ia64/linux-2.6.18-xen.hg

view drivers/md/dm-ioctl.c @ 897:329ea0ccb344

balloon: try harder to balloon up under memory pressure.

Currently if the balloon driver is unable to increase the guest's
reservation it assumes the failure was due to reaching its full
allocation, gives up on the ballooning operation and records the limit
it reached as the "hard limit". The driver will not try again until
the target is set again (even to the same value).

However it is possible that ballooning has in fact failed due to
memory pressure in the host and therefore it is desirable to keep
attempting to reach the target in case memory becomes available. The
most likely scenario is that some guests are ballooning down while
others are ballooning up and therefore there is temporary memory
pressure while things stabilise. You would not expect a well behaved
toolstack to ask a domain to balloon to more than its allocation nor
would you expect it to deliberately over-commit memory by setting
balloon targets which exceed the total host memory.

This patch drops the concept of a hard limit and causes the balloon
driver to retry increasing the reservation on a timer in the same
manner as when decreasing the reservation.

Also if we partially succeed in increasing the reservation
(i.e. receive less pages than we asked for) then we may as well keep
those pages rather than returning them to Xen.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jun 05 14:01:20 2009 +0100 (2009-06-05)
parents 831230e53067
children
line source
1 /*
2 * Copyright (C) 2001, 2002 Sistina Software (UK) Limited.
3 * Copyright (C) 2004 - 2006 Red Hat, Inc. All rights reserved.
4 *
5 * This file is released under the GPL.
6 */
8 #include "dm.h"
10 #include <linux/module.h>
11 #include <linux/vmalloc.h>
12 #include <linux/miscdevice.h>
13 #include <linux/init.h>
14 #include <linux/wait.h>
15 #include <linux/slab.h>
16 #include <linux/dm-ioctl.h>
17 #include <linux/hdreg.h>
19 #include <asm/uaccess.h>
21 #define DM_MSG_PREFIX "ioctl"
22 #define DM_DRIVER_EMAIL "dm-devel@redhat.com"
24 /*-----------------------------------------------------------------
25 * The ioctl interface needs to be able to look up devices by
26 * name or uuid.
27 *---------------------------------------------------------------*/
28 struct hash_cell {
29 struct list_head name_list;
30 struct list_head uuid_list;
32 char *name;
33 char *uuid;
34 struct mapped_device *md;
35 struct dm_table *new_map;
36 };
38 struct vers_iter {
39 size_t param_size;
40 struct dm_target_versions *vers, *old_vers;
41 char *end;
42 uint32_t flags;
43 };
46 #define NUM_BUCKETS 64
47 #define MASK_BUCKETS (NUM_BUCKETS - 1)
48 static struct list_head _name_buckets[NUM_BUCKETS];
49 static struct list_head _uuid_buckets[NUM_BUCKETS];
51 static void dm_hash_remove_all(int keep_open_devices);
53 /*
54 * Guards access to both hash tables.
55 */
56 static DECLARE_RWSEM(_hash_lock);
58 static void init_buckets(struct list_head *buckets)
59 {
60 unsigned int i;
62 for (i = 0; i < NUM_BUCKETS; i++)
63 INIT_LIST_HEAD(buckets + i);
64 }
66 static int dm_hash_init(void)
67 {
68 init_buckets(_name_buckets);
69 init_buckets(_uuid_buckets);
70 return 0;
71 }
73 static void dm_hash_exit(void)
74 {
75 dm_hash_remove_all(0);
76 }
78 /*-----------------------------------------------------------------
79 * Hash function:
80 * We're not really concerned with the str hash function being
81 * fast since it's only used by the ioctl interface.
82 *---------------------------------------------------------------*/
83 static unsigned int hash_str(const char *str)
84 {
85 const unsigned int hash_mult = 2654435387U;
86 unsigned int h = 0;
88 while (*str)
89 h = (h + (unsigned int) *str++) * hash_mult;
91 return h & MASK_BUCKETS;
92 }
94 /*-----------------------------------------------------------------
95 * Code for looking up a device by name
96 *---------------------------------------------------------------*/
97 static struct hash_cell *__get_name_cell(const char *str)
98 {
99 struct hash_cell *hc;
100 unsigned int h = hash_str(str);
102 list_for_each_entry (hc, _name_buckets + h, name_list)
103 if (!strcmp(hc->name, str)) {
104 dm_get(hc->md);
105 return hc;
106 }
108 return NULL;
109 }
111 static struct hash_cell *__get_uuid_cell(const char *str)
112 {
113 struct hash_cell *hc;
114 unsigned int h = hash_str(str);
116 list_for_each_entry (hc, _uuid_buckets + h, uuid_list)
117 if (!strcmp(hc->uuid, str)) {
118 dm_get(hc->md);
119 return hc;
120 }
122 return NULL;
123 }
125 /*-----------------------------------------------------------------
126 * Inserting, removing and renaming a device.
127 *---------------------------------------------------------------*/
128 static struct hash_cell *alloc_cell(const char *name, const char *uuid,
129 struct mapped_device *md)
130 {
131 struct hash_cell *hc;
133 hc = kmalloc(sizeof(*hc), GFP_KERNEL);
134 if (!hc)
135 return NULL;
137 hc->name = kstrdup(name, GFP_KERNEL);
138 if (!hc->name) {
139 kfree(hc);
140 return NULL;
141 }
143 if (!uuid)
144 hc->uuid = NULL;
146 else {
147 hc->uuid = kstrdup(uuid, GFP_KERNEL);
148 if (!hc->uuid) {
149 kfree(hc->name);
150 kfree(hc);
151 return NULL;
152 }
153 }
155 INIT_LIST_HEAD(&hc->name_list);
156 INIT_LIST_HEAD(&hc->uuid_list);
157 hc->md = md;
158 hc->new_map = NULL;
159 return hc;
160 }
162 static void free_cell(struct hash_cell *hc)
163 {
164 if (hc) {
165 kfree(hc->name);
166 kfree(hc->uuid);
167 kfree(hc);
168 }
169 }
171 /*
172 * The kdev_t and uuid of a device can never change once it is
173 * initially inserted.
174 */
175 static int dm_hash_insert(const char *name, const char *uuid, struct mapped_device *md)
176 {
177 struct hash_cell *cell, *hc;
179 /*
180 * Allocate the new cells.
181 */
182 cell = alloc_cell(name, uuid, md);
183 if (!cell)
184 return -ENOMEM;
186 /*
187 * Insert the cell into both hash tables.
188 */
189 down_write(&_hash_lock);
190 hc = __get_name_cell(name);
191 if (hc) {
192 dm_put(hc->md);
193 goto bad;
194 }
196 list_add(&cell->name_list, _name_buckets + hash_str(name));
198 if (uuid) {
199 hc = __get_uuid_cell(uuid);
200 if (hc) {
201 list_del(&cell->name_list);
202 dm_put(hc->md);
203 goto bad;
204 }
205 list_add(&cell->uuid_list, _uuid_buckets + hash_str(uuid));
206 }
207 dm_get(md);
208 dm_set_mdptr(md, cell);
209 up_write(&_hash_lock);
211 return 0;
213 bad:
214 up_write(&_hash_lock);
215 free_cell(cell);
216 return -EBUSY;
217 }
219 static void __hash_remove(struct hash_cell *hc)
220 {
221 struct dm_table *table;
223 /* remove from the dev hash */
224 list_del(&hc->uuid_list);
225 list_del(&hc->name_list);
226 dm_set_mdptr(hc->md, NULL);
228 table = dm_get_table(hc->md);
229 if (table) {
230 dm_table_event(table);
231 dm_table_put(table);
232 }
234 if (hc->new_map)
235 dm_table_put(hc->new_map);
236 dm_put(hc->md);
237 free_cell(hc);
238 }
240 static void dm_hash_remove_all(int keep_open_devices)
241 {
242 int i, dev_skipped, dev_removed;
243 struct hash_cell *hc;
244 struct list_head *tmp, *n;
246 down_write(&_hash_lock);
248 retry:
249 dev_skipped = dev_removed = 0;
250 for (i = 0; i < NUM_BUCKETS; i++) {
251 list_for_each_safe (tmp, n, _name_buckets + i) {
252 hc = list_entry(tmp, struct hash_cell, name_list);
254 if (keep_open_devices &&
255 dm_lock_for_deletion(hc->md)) {
256 dev_skipped++;
257 continue;
258 }
259 __hash_remove(hc);
260 dev_removed = 1;
261 }
262 }
264 /*
265 * Some mapped devices may be using other mapped devices, so if any
266 * still exist, repeat until we make no further progress.
267 */
268 if (dev_skipped) {
269 if (dev_removed)
270 goto retry;
272 DMWARN("remove_all left %d open device(s)", dev_skipped);
273 }
275 up_write(&_hash_lock);
276 }
278 static int dm_hash_rename(const char *old, const char *new)
279 {
280 char *new_name, *old_name;
281 struct hash_cell *hc;
282 struct dm_table *table;
284 /*
285 * duplicate new.
286 */
287 new_name = kstrdup(new, GFP_KERNEL);
288 if (!new_name)
289 return -ENOMEM;
291 down_write(&_hash_lock);
293 /*
294 * Is new free ?
295 */
296 hc = __get_name_cell(new);
297 if (hc) {
298 DMWARN("asked to rename to an already existing name %s -> %s",
299 old, new);
300 dm_put(hc->md);
301 up_write(&_hash_lock);
302 kfree(new_name);
303 return -EBUSY;
304 }
306 /*
307 * Is there such a device as 'old' ?
308 */
309 hc = __get_name_cell(old);
310 if (!hc) {
311 DMWARN("asked to rename a non existent device %s -> %s",
312 old, new);
313 up_write(&_hash_lock);
314 kfree(new_name);
315 return -ENXIO;
316 }
318 /*
319 * rename and move the name cell.
320 */
321 list_del(&hc->name_list);
322 old_name = hc->name;
323 hc->name = new_name;
324 list_add(&hc->name_list, _name_buckets + hash_str(new_name));
326 /*
327 * Wake up any dm event waiters.
328 */
329 table = dm_get_table(hc->md);
330 if (table) {
331 dm_table_event(table);
332 dm_table_put(table);
333 }
335 dm_put(hc->md);
336 up_write(&_hash_lock);
337 kfree(old_name);
338 return 0;
339 }
341 /*-----------------------------------------------------------------
342 * Implementation of the ioctl commands
343 *---------------------------------------------------------------*/
344 /*
345 * All the ioctl commands get dispatched to functions with this
346 * prototype.
347 */
348 typedef int (*ioctl_fn)(struct dm_ioctl *param, size_t param_size);
350 static int remove_all(struct dm_ioctl *param, size_t param_size)
351 {
352 dm_hash_remove_all(1);
353 param->data_size = 0;
354 return 0;
355 }
357 /*
358 * Round up the ptr to an 8-byte boundary.
359 */
360 #define ALIGN_MASK 7
361 static inline void *align_ptr(void *ptr)
362 {
363 return (void *) (((size_t) (ptr + ALIGN_MASK)) & ~ALIGN_MASK);
364 }
366 /*
367 * Retrieves the data payload buffer from an already allocated
368 * struct dm_ioctl.
369 */
370 static void *get_result_buffer(struct dm_ioctl *param, size_t param_size,
371 size_t *len)
372 {
373 param->data_start = align_ptr(param + 1) - (void *) param;
375 if (param->data_start < param_size)
376 *len = param_size - param->data_start;
377 else
378 *len = 0;
380 return ((void *) param) + param->data_start;
381 }
383 static int list_devices(struct dm_ioctl *param, size_t param_size)
384 {
385 unsigned int i;
386 struct hash_cell *hc;
387 size_t len, needed = 0;
388 struct gendisk *disk;
389 struct dm_name_list *nl, *old_nl = NULL;
391 down_write(&_hash_lock);
393 /*
394 * Loop through all the devices working out how much
395 * space we need.
396 */
397 for (i = 0; i < NUM_BUCKETS; i++) {
398 list_for_each_entry (hc, _name_buckets + i, name_list) {
399 needed += sizeof(struct dm_name_list);
400 needed += strlen(hc->name) + 1;
401 needed += ALIGN_MASK;
402 }
403 }
405 /*
406 * Grab our output buffer.
407 */
408 nl = get_result_buffer(param, param_size, &len);
409 if (len < needed) {
410 param->flags |= DM_BUFFER_FULL_FLAG;
411 goto out;
412 }
413 param->data_size = param->data_start + needed;
415 nl->dev = 0; /* Flags no data */
417 /*
418 * Now loop through filling out the names.
419 */
420 for (i = 0; i < NUM_BUCKETS; i++) {
421 list_for_each_entry (hc, _name_buckets + i, name_list) {
422 if (old_nl)
423 old_nl->next = (uint32_t) ((void *) nl -
424 (void *) old_nl);
425 disk = dm_disk(hc->md);
426 nl->dev = huge_encode_dev(MKDEV(disk->major, disk->first_minor));
427 nl->next = 0;
428 strcpy(nl->name, hc->name);
430 old_nl = nl;
431 nl = align_ptr(((void *) ++nl) + strlen(hc->name) + 1);
432 }
433 }
435 out:
436 up_write(&_hash_lock);
437 return 0;
438 }
440 static void list_version_get_needed(struct target_type *tt, void *needed_param)
441 {
442 size_t *needed = needed_param;
444 *needed += sizeof(struct dm_target_versions);
445 *needed += strlen(tt->name);
446 *needed += ALIGN_MASK;
447 }
449 static void list_version_get_info(struct target_type *tt, void *param)
450 {
451 struct vers_iter *info = param;
453 /* Check space - it might have changed since the first iteration */
454 if ((char *)info->vers + sizeof(tt->version) + strlen(tt->name) + 1 >
455 info->end) {
457 info->flags = DM_BUFFER_FULL_FLAG;
458 return;
459 }
461 if (info->old_vers)
462 info->old_vers->next = (uint32_t) ((void *)info->vers -
463 (void *)info->old_vers);
464 info->vers->version[0] = tt->version[0];
465 info->vers->version[1] = tt->version[1];
466 info->vers->version[2] = tt->version[2];
467 info->vers->next = 0;
468 strcpy(info->vers->name, tt->name);
470 info->old_vers = info->vers;
471 info->vers = align_ptr(((void *) ++info->vers) + strlen(tt->name) + 1);
472 }
474 static int list_versions(struct dm_ioctl *param, size_t param_size)
475 {
476 size_t len, needed = 0;
477 struct dm_target_versions *vers;
478 struct vers_iter iter_info;
480 /*
481 * Loop through all the devices working out how much
482 * space we need.
483 */
484 dm_target_iterate(list_version_get_needed, &needed);
486 /*
487 * Grab our output buffer.
488 */
489 vers = get_result_buffer(param, param_size, &len);
490 if (len < needed) {
491 param->flags |= DM_BUFFER_FULL_FLAG;
492 goto out;
493 }
494 param->data_size = param->data_start + needed;
496 iter_info.param_size = param_size;
497 iter_info.old_vers = NULL;
498 iter_info.vers = vers;
499 iter_info.flags = 0;
500 iter_info.end = (char *)vers+len;
502 /*
503 * Now loop through filling out the names & versions.
504 */
505 dm_target_iterate(list_version_get_info, &iter_info);
506 param->flags |= iter_info.flags;
508 out:
509 return 0;
510 }
514 static int check_name(const char *name)
515 {
516 if (strchr(name, '/')) {
517 DMWARN("invalid device name");
518 return -EINVAL;
519 }
521 return 0;
522 }
524 /*
525 * Fills in a dm_ioctl structure, ready for sending back to
526 * userland.
527 */
528 static int __dev_status(struct mapped_device *md, struct dm_ioctl *param)
529 {
530 struct gendisk *disk = dm_disk(md);
531 struct dm_table *table;
533 param->flags &= ~(DM_SUSPEND_FLAG | DM_READONLY_FLAG |
534 DM_ACTIVE_PRESENT_FLAG);
536 if (dm_suspended(md))
537 param->flags |= DM_SUSPEND_FLAG;
539 param->dev = huge_encode_dev(MKDEV(disk->major, disk->first_minor));
541 /*
542 * Yes, this will be out of date by the time it gets back
543 * to userland, but it is still very useful for
544 * debugging.
545 */
546 param->open_count = dm_open_count(md);
548 if (disk->policy)
549 param->flags |= DM_READONLY_FLAG;
551 param->event_nr = dm_get_event_nr(md);
553 table = dm_get_table(md);
554 if (table) {
555 param->flags |= DM_ACTIVE_PRESENT_FLAG;
556 param->target_count = dm_table_get_num_targets(table);
557 dm_table_put(table);
558 } else
559 param->target_count = 0;
561 return 0;
562 }
564 static int dev_create(struct dm_ioctl *param, size_t param_size)
565 {
566 int r, m = DM_ANY_MINOR;
567 struct mapped_device *md;
569 r = check_name(param->name);
570 if (r)
571 return r;
573 if (param->flags & DM_PERSISTENT_DEV_FLAG)
574 m = MINOR(huge_decode_dev(param->dev));
576 r = dm_create(m, &md);
577 if (r)
578 return r;
580 r = dm_hash_insert(param->name, *param->uuid ? param->uuid : NULL, md);
581 if (r) {
582 dm_put(md);
583 return r;
584 }
586 param->flags &= ~DM_INACTIVE_PRESENT_FLAG;
588 r = __dev_status(md, param);
589 dm_put(md);
591 return r;
592 }
594 /*
595 * Always use UUID for lookups if it's present, otherwise use name or dev.
596 */
597 static struct hash_cell *__find_device_hash_cell(struct dm_ioctl *param)
598 {
599 struct mapped_device *md;
600 void *mdptr = NULL;
602 if (*param->uuid)
603 return __get_uuid_cell(param->uuid);
605 if (*param->name)
606 return __get_name_cell(param->name);
608 md = dm_get_md(huge_decode_dev(param->dev));
609 if (md)
610 mdptr = dm_get_mdptr(md);
612 return mdptr;
613 }
615 static struct mapped_device *find_device(struct dm_ioctl *param)
616 {
617 struct hash_cell *hc;
618 struct mapped_device *md = NULL;
620 down_read(&_hash_lock);
621 hc = __find_device_hash_cell(param);
622 if (hc) {
623 md = hc->md;
625 /*
626 * Sneakily write in both the name and the uuid
627 * while we have the cell.
628 */
629 strncpy(param->name, hc->name, sizeof(param->name));
630 if (hc->uuid)
631 strncpy(param->uuid, hc->uuid, sizeof(param->uuid)-1);
632 else
633 param->uuid[0] = '\0';
635 if (hc->new_map)
636 param->flags |= DM_INACTIVE_PRESENT_FLAG;
637 else
638 param->flags &= ~DM_INACTIVE_PRESENT_FLAG;
639 }
640 up_read(&_hash_lock);
642 return md;
643 }
645 static int dev_remove(struct dm_ioctl *param, size_t param_size)
646 {
647 struct hash_cell *hc;
648 struct mapped_device *md;
649 int r;
651 down_write(&_hash_lock);
652 hc = __find_device_hash_cell(param);
654 if (!hc) {
655 DMWARN("device doesn't appear to be in the dev hash table.");
656 up_write(&_hash_lock);
657 return -ENXIO;
658 }
660 md = hc->md;
662 /*
663 * Ensure the device is not open and nothing further can open it.
664 */
665 r = dm_lock_for_deletion(md);
666 if (r) {
667 DMWARN("unable to remove open device %s", hc->name);
668 up_write(&_hash_lock);
669 dm_put(md);
670 return r;
671 }
673 __hash_remove(hc);
674 up_write(&_hash_lock);
675 dm_put(md);
676 param->data_size = 0;
677 return 0;
678 }
680 /*
681 * Check a string doesn't overrun the chunk of
682 * memory we copied from userland.
683 */
684 static int invalid_str(char *str, void *end)
685 {
686 while ((void *) str < end)
687 if (!*str++)
688 return 0;
690 return -EINVAL;
691 }
693 static int dev_rename(struct dm_ioctl *param, size_t param_size)
694 {
695 int r;
696 char *new_name = (char *) param + param->data_start;
698 if (new_name < (char *) (param + 1) ||
699 invalid_str(new_name, (void *) param + param_size)) {
700 DMWARN("Invalid new logical volume name supplied.");
701 return -EINVAL;
702 }
704 r = check_name(new_name);
705 if (r)
706 return r;
708 param->data_size = 0;
709 return dm_hash_rename(param->name, new_name);
710 }
712 static int dev_set_geometry(struct dm_ioctl *param, size_t param_size)
713 {
714 int r = -EINVAL, x;
715 struct mapped_device *md;
716 struct hd_geometry geometry;
717 unsigned long indata[4];
718 char *geostr = (char *) param + param->data_start;
720 md = find_device(param);
721 if (!md)
722 return -ENXIO;
724 if (geostr < (char *) (param + 1) ||
725 invalid_str(geostr, (void *) param + param_size)) {
726 DMWARN("Invalid geometry supplied.");
727 goto out;
728 }
730 x = sscanf(geostr, "%lu %lu %lu %lu", indata,
731 indata + 1, indata + 2, indata + 3);
733 if (x != 4) {
734 DMWARN("Unable to interpret geometry settings.");
735 goto out;
736 }
738 if (indata[0] > 65535 || indata[1] > 255 ||
739 indata[2] > 255 || indata[3] > ULONG_MAX) {
740 DMWARN("Geometry exceeds range limits.");
741 goto out;
742 }
744 geometry.cylinders = indata[0];
745 geometry.heads = indata[1];
746 geometry.sectors = indata[2];
747 geometry.start = indata[3];
749 r = dm_set_geometry(md, &geometry);
750 if (!r)
751 r = __dev_status(md, param);
753 param->data_size = 0;
755 out:
756 dm_put(md);
757 return r;
758 }
760 static int do_suspend(struct dm_ioctl *param)
761 {
762 int r = 0;
763 int do_lockfs = 1;
764 struct mapped_device *md;
766 md = find_device(param);
767 if (!md)
768 return -ENXIO;
770 if (param->flags & DM_SKIP_LOCKFS_FLAG)
771 do_lockfs = 0;
773 if (!dm_suspended(md))
774 r = dm_suspend(md, do_lockfs);
776 if (!r)
777 r = __dev_status(md, param);
779 dm_put(md);
780 return r;
781 }
783 static int do_resume(struct dm_ioctl *param)
784 {
785 int r = 0;
786 int do_lockfs = 1;
787 struct hash_cell *hc;
788 struct mapped_device *md;
789 struct dm_table *new_map;
791 down_write(&_hash_lock);
793 hc = __find_device_hash_cell(param);
794 if (!hc) {
795 DMWARN("device doesn't appear to be in the dev hash table.");
796 up_write(&_hash_lock);
797 return -ENXIO;
798 }
800 md = hc->md;
802 new_map = hc->new_map;
803 hc->new_map = NULL;
804 param->flags &= ~DM_INACTIVE_PRESENT_FLAG;
806 up_write(&_hash_lock);
808 /* Do we need to load a new map ? */
809 if (new_map) {
810 /* Suspend if it isn't already suspended */
811 if (param->flags & DM_SKIP_LOCKFS_FLAG)
812 do_lockfs = 0;
813 if (!dm_suspended(md))
814 dm_suspend(md, do_lockfs);
816 r = dm_swap_table(md, new_map);
817 if (r) {
818 dm_put(md);
819 dm_table_put(new_map);
820 return r;
821 }
823 if (dm_table_get_mode(new_map) & FMODE_WRITE)
824 set_disk_ro(dm_disk(md), 0);
825 else
826 set_disk_ro(dm_disk(md), 1);
828 dm_table_put(new_map);
829 }
831 if (dm_suspended(md))
832 r = dm_resume(md);
834 if (!r)
835 r = __dev_status(md, param);
837 dm_put(md);
838 return r;
839 }
841 /*
842 * Set or unset the suspension state of a device.
843 * If the device already is in the requested state we just return its status.
844 */
845 static int dev_suspend(struct dm_ioctl *param, size_t param_size)
846 {
847 if (param->flags & DM_SUSPEND_FLAG)
848 return do_suspend(param);
850 return do_resume(param);
851 }
853 /*
854 * Copies device info back to user space, used by
855 * the create and info ioctls.
856 */
857 static int dev_status(struct dm_ioctl *param, size_t param_size)
858 {
859 int r;
860 struct mapped_device *md;
862 md = find_device(param);
863 if (!md)
864 return -ENXIO;
866 r = __dev_status(md, param);
867 dm_put(md);
868 return r;
869 }
871 /*
872 * Build up the status struct for each target
873 */
874 static void retrieve_status(struct dm_table *table,
875 struct dm_ioctl *param, size_t param_size)
876 {
877 unsigned int i, num_targets;
878 struct dm_target_spec *spec;
879 char *outbuf, *outptr;
880 status_type_t type;
881 size_t remaining, len, used = 0;
883 outptr = outbuf = get_result_buffer(param, param_size, &len);
885 if (param->flags & DM_STATUS_TABLE_FLAG)
886 type = STATUSTYPE_TABLE;
887 else
888 type = STATUSTYPE_INFO;
890 /* Get all the target info */
891 num_targets = dm_table_get_num_targets(table);
892 for (i = 0; i < num_targets; i++) {
893 struct dm_target *ti = dm_table_get_target(table, i);
895 remaining = len - (outptr - outbuf);
896 if (remaining <= sizeof(struct dm_target_spec)) {
897 param->flags |= DM_BUFFER_FULL_FLAG;
898 break;
899 }
901 spec = (struct dm_target_spec *) outptr;
903 spec->status = 0;
904 spec->sector_start = ti->begin;
905 spec->length = ti->len;
906 strncpy(spec->target_type, ti->type->name,
907 sizeof(spec->target_type));
909 outptr += sizeof(struct dm_target_spec);
910 remaining = len - (outptr - outbuf);
911 if (remaining <= 0) {
912 param->flags |= DM_BUFFER_FULL_FLAG;
913 break;
914 }
916 /* Get the status/table string from the target driver */
917 if (ti->type->status) {
918 if (ti->type->status(ti, type, outptr, remaining)) {
919 param->flags |= DM_BUFFER_FULL_FLAG;
920 break;
921 }
922 } else
923 outptr[0] = '\0';
925 outptr += strlen(outptr) + 1;
926 used = param->data_start + (outptr - outbuf);
928 outptr = align_ptr(outptr);
929 spec->next = outptr - outbuf;
930 }
932 if (used)
933 param->data_size = used;
935 param->target_count = num_targets;
936 }
938 /*
939 * Wait for a device to report an event
940 */
941 static int dev_wait(struct dm_ioctl *param, size_t param_size)
942 {
943 int r;
944 struct mapped_device *md;
945 struct dm_table *table;
947 md = find_device(param);
948 if (!md)
949 return -ENXIO;
951 /*
952 * Wait for a notification event
953 */
954 if (dm_wait_event(md, param->event_nr)) {
955 r = -ERESTARTSYS;
956 goto out;
957 }
959 /*
960 * The userland program is going to want to know what
961 * changed to trigger the event, so we may as well tell
962 * him and save an ioctl.
963 */
964 r = __dev_status(md, param);
965 if (r)
966 goto out;
968 table = dm_get_table(md);
969 if (table) {
970 retrieve_status(table, param, param_size);
971 dm_table_put(table);
972 }
974 out:
975 dm_put(md);
976 return r;
977 }
979 static inline int get_mode(struct dm_ioctl *param)
980 {
981 int mode = FMODE_READ | FMODE_WRITE;
983 if (param->flags & DM_READONLY_FLAG)
984 mode = FMODE_READ;
986 return mode;
987 }
989 static int next_target(struct dm_target_spec *last, uint32_t next, void *end,
990 struct dm_target_spec **spec, char **target_params)
991 {
992 *spec = (struct dm_target_spec *) ((unsigned char *) last + next);
993 *target_params = (char *) (*spec + 1);
995 if (*spec < (last + 1))
996 return -EINVAL;
998 return invalid_str(*target_params, end);
999 }
1001 static int populate_table(struct dm_table *table,
1002 struct dm_ioctl *param, size_t param_size)
1004 int r;
1005 unsigned int i = 0;
1006 struct dm_target_spec *spec = (struct dm_target_spec *) param;
1007 uint32_t next = param->data_start;
1008 void *end = (void *) param + param_size;
1009 char *target_params;
1011 if (!param->target_count) {
1012 DMWARN("populate_table: no targets specified");
1013 return -EINVAL;
1016 for (i = 0; i < param->target_count; i++) {
1018 r = next_target(spec, next, end, &spec, &target_params);
1019 if (r) {
1020 DMWARN("unable to find target");
1021 return r;
1024 r = dm_table_add_target(table, spec->target_type,
1025 (sector_t) spec->sector_start,
1026 (sector_t) spec->length,
1027 target_params);
1028 if (r) {
1029 DMWARN("error adding target to table");
1030 return r;
1033 next = spec->next;
1036 return dm_table_complete(table);
1039 static int table_load(struct dm_ioctl *param, size_t param_size)
1041 int r;
1042 struct hash_cell *hc;
1043 struct dm_table *t;
1044 struct mapped_device *md;
1046 md = find_device(param);
1047 if (!md)
1048 return -ENXIO;
1050 r = dm_table_create(&t, get_mode(param), param->target_count, md);
1051 if (r)
1052 goto out;
1054 r = populate_table(t, param, param_size);
1055 if (r) {
1056 dm_table_put(t);
1057 goto out;
1060 down_write(&_hash_lock);
1061 hc = dm_get_mdptr(md);
1062 if (!hc || hc->md != md) {
1063 DMWARN("device has been removed from the dev hash table.");
1064 dm_table_put(t);
1065 up_write(&_hash_lock);
1066 r = -ENXIO;
1067 goto out;
1070 if (hc->new_map)
1071 dm_table_put(hc->new_map);
1072 hc->new_map = t;
1073 up_write(&_hash_lock);
1075 param->flags |= DM_INACTIVE_PRESENT_FLAG;
1076 r = __dev_status(md, param);
1078 out:
1079 dm_put(md);
1081 return r;
1084 static int table_clear(struct dm_ioctl *param, size_t param_size)
1086 int r;
1087 struct hash_cell *hc;
1088 struct mapped_device *md;
1090 down_write(&_hash_lock);
1092 hc = __find_device_hash_cell(param);
1093 if (!hc) {
1094 DMWARN("device doesn't appear to be in the dev hash table.");
1095 up_write(&_hash_lock);
1096 return -ENXIO;
1099 if (hc->new_map) {
1100 dm_table_put(hc->new_map);
1101 hc->new_map = NULL;
1104 param->flags &= ~DM_INACTIVE_PRESENT_FLAG;
1106 r = __dev_status(hc->md, param);
1107 md = hc->md;
1108 up_write(&_hash_lock);
1109 dm_put(md);
1110 return r;
1113 /*
1114 * Retrieves a list of devices used by a particular dm device.
1115 */
1116 static void retrieve_deps(struct dm_table *table,
1117 struct dm_ioctl *param, size_t param_size)
1119 unsigned int count = 0;
1120 struct list_head *tmp;
1121 size_t len, needed;
1122 struct dm_dev *dd;
1123 struct dm_target_deps *deps;
1125 deps = get_result_buffer(param, param_size, &len);
1127 /*
1128 * Count the devices.
1129 */
1130 list_for_each (tmp, dm_table_get_devices(table))
1131 count++;
1133 /*
1134 * Check we have enough space.
1135 */
1136 needed = sizeof(*deps) + (sizeof(*deps->dev) * count);
1137 if (len < needed) {
1138 param->flags |= DM_BUFFER_FULL_FLAG;
1139 return;
1142 /*
1143 * Fill in the devices.
1144 */
1145 deps->count = count;
1146 count = 0;
1147 list_for_each_entry (dd, dm_table_get_devices(table), list)
1148 deps->dev[count++] = huge_encode_dev(dd->bdev->bd_dev);
1150 param->data_size = param->data_start + needed;
1153 static int table_deps(struct dm_ioctl *param, size_t param_size)
1155 int r = 0;
1156 struct mapped_device *md;
1157 struct dm_table *table;
1159 md = find_device(param);
1160 if (!md)
1161 return -ENXIO;
1163 r = __dev_status(md, param);
1164 if (r)
1165 goto out;
1167 table = dm_get_table(md);
1168 if (table) {
1169 retrieve_deps(table, param, param_size);
1170 dm_table_put(table);
1173 out:
1174 dm_put(md);
1175 return r;
1178 /*
1179 * Return the status of a device as a text string for each
1180 * target.
1181 */
1182 static int table_status(struct dm_ioctl *param, size_t param_size)
1184 int r;
1185 struct mapped_device *md;
1186 struct dm_table *table;
1188 md = find_device(param);
1189 if (!md)
1190 return -ENXIO;
1192 r = __dev_status(md, param);
1193 if (r)
1194 goto out;
1196 table = dm_get_table(md);
1197 if (table) {
1198 retrieve_status(table, param, param_size);
1199 dm_table_put(table);
1202 out:
1203 dm_put(md);
1204 return r;
1207 /*
1208 * Pass a message to the target that's at the supplied device offset.
1209 */
1210 static int target_message(struct dm_ioctl *param, size_t param_size)
1212 int r, argc;
1213 char **argv;
1214 struct mapped_device *md;
1215 struct dm_table *table;
1216 struct dm_target *ti;
1217 struct dm_target_msg *tmsg = (void *) param + param->data_start;
1219 md = find_device(param);
1220 if (!md)
1221 return -ENXIO;
1223 r = __dev_status(md, param);
1224 if (r)
1225 goto out;
1227 if (tmsg < (struct dm_target_msg *) (param + 1) ||
1228 invalid_str(tmsg->message, (void *) param + param_size)) {
1229 DMWARN("Invalid target message parameters.");
1230 r = -EINVAL;
1231 goto out;
1234 r = dm_split_args(&argc, &argv, tmsg->message);
1235 if (r) {
1236 DMWARN("Failed to split target message parameters");
1237 goto out;
1240 table = dm_get_table(md);
1241 if (!table)
1242 goto out_argv;
1244 if (tmsg->sector >= dm_table_get_size(table)) {
1245 DMWARN("Target message sector outside device.");
1246 r = -EINVAL;
1247 goto out_table;
1250 ti = dm_table_find_target(table, tmsg->sector);
1251 if (ti->type->message)
1252 r = ti->type->message(ti, argc, argv);
1253 else {
1254 DMWARN("Target type does not support messages");
1255 r = -EINVAL;
1258 out_table:
1259 dm_table_put(table);
1260 out_argv:
1261 kfree(argv);
1262 out:
1263 param->data_size = 0;
1264 dm_put(md);
1265 return r;
1268 /*-----------------------------------------------------------------
1269 * Implementation of open/close/ioctl on the special char
1270 * device.
1271 *---------------------------------------------------------------*/
1272 static ioctl_fn lookup_ioctl(unsigned int cmd)
1274 static struct {
1275 int cmd;
1276 ioctl_fn fn;
1277 } _ioctls[] = {
1278 {DM_VERSION_CMD, NULL}, /* version is dealt with elsewhere */
1279 {DM_REMOVE_ALL_CMD, remove_all},
1280 {DM_LIST_DEVICES_CMD, list_devices},
1282 {DM_DEV_CREATE_CMD, dev_create},
1283 {DM_DEV_REMOVE_CMD, dev_remove},
1284 {DM_DEV_RENAME_CMD, dev_rename},
1285 {DM_DEV_SUSPEND_CMD, dev_suspend},
1286 {DM_DEV_STATUS_CMD, dev_status},
1287 {DM_DEV_WAIT_CMD, dev_wait},
1289 {DM_TABLE_LOAD_CMD, table_load},
1290 {DM_TABLE_CLEAR_CMD, table_clear},
1291 {DM_TABLE_DEPS_CMD, table_deps},
1292 {DM_TABLE_STATUS_CMD, table_status},
1294 {DM_LIST_VERSIONS_CMD, list_versions},
1296 {DM_TARGET_MSG_CMD, target_message},
1297 {DM_DEV_SET_GEOMETRY_CMD, dev_set_geometry}
1298 };
1300 return (cmd >= ARRAY_SIZE(_ioctls)) ? NULL : _ioctls[cmd].fn;
1303 /*
1304 * As well as checking the version compatibility this always
1305 * copies the kernel interface version out.
1306 */
1307 static int check_version(unsigned int cmd, struct dm_ioctl __user *user)
1309 uint32_t version[3];
1310 int r = 0;
1312 if (copy_from_user(version, user->version, sizeof(version)))
1313 return -EFAULT;
1315 if ((DM_VERSION_MAJOR != version[0]) ||
1316 (DM_VERSION_MINOR < version[1])) {
1317 DMWARN("ioctl interface mismatch: "
1318 "kernel(%u.%u.%u), user(%u.%u.%u), cmd(%d)",
1319 DM_VERSION_MAJOR, DM_VERSION_MINOR,
1320 DM_VERSION_PATCHLEVEL,
1321 version[0], version[1], version[2], cmd);
1322 r = -EINVAL;
1325 /*
1326 * Fill in the kernel version.
1327 */
1328 version[0] = DM_VERSION_MAJOR;
1329 version[1] = DM_VERSION_MINOR;
1330 version[2] = DM_VERSION_PATCHLEVEL;
1331 if (copy_to_user(user->version, version, sizeof(version)))
1332 return -EFAULT;
1334 return r;
1337 static void free_params(struct dm_ioctl *param)
1339 vfree(param);
1342 static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl **param)
1344 struct dm_ioctl tmp, *dmi;
1346 if (copy_from_user(&tmp, user, sizeof(tmp)))
1347 return -EFAULT;
1349 if (tmp.data_size < sizeof(tmp))
1350 return -EINVAL;
1352 dmi = (struct dm_ioctl *) vmalloc(tmp.data_size);
1353 if (!dmi)
1354 return -ENOMEM;
1356 if (copy_from_user(dmi, user, tmp.data_size)) {
1357 vfree(dmi);
1358 return -EFAULT;
1361 *param = dmi;
1362 return 0;
1365 static int validate_params(uint cmd, struct dm_ioctl *param)
1367 /* Always clear this flag */
1368 param->flags &= ~DM_BUFFER_FULL_FLAG;
1370 /* Ignores parameters */
1371 if (cmd == DM_REMOVE_ALL_CMD ||
1372 cmd == DM_LIST_DEVICES_CMD ||
1373 cmd == DM_LIST_VERSIONS_CMD)
1374 return 0;
1376 if ((cmd == DM_DEV_CREATE_CMD)) {
1377 if (!*param->name) {
1378 DMWARN("name not supplied when creating device");
1379 return -EINVAL;
1381 } else if ((*param->uuid && *param->name)) {
1382 DMWARN("only supply one of name or uuid, cmd(%u)", cmd);
1383 return -EINVAL;
1386 /* Ensure strings are terminated */
1387 param->name[DM_NAME_LEN - 1] = '\0';
1388 param->uuid[DM_UUID_LEN - 1] = '\0';
1390 return 0;
1393 static int ctl_ioctl(struct inode *inode, struct file *file,
1394 uint command, ulong u)
1396 int r = 0;
1397 unsigned int cmd;
1398 struct dm_ioctl *param;
1399 struct dm_ioctl __user *user = (struct dm_ioctl __user *) u;
1400 ioctl_fn fn = NULL;
1401 size_t param_size;
1403 /* only root can play with this */
1404 if (!capable(CAP_SYS_ADMIN))
1405 return -EACCES;
1407 if (_IOC_TYPE(command) != DM_IOCTL)
1408 return -ENOTTY;
1410 cmd = _IOC_NR(command);
1412 /*
1413 * Check the interface version passed in. This also
1414 * writes out the kernel's interface version.
1415 */
1416 r = check_version(cmd, user);
1417 if (r)
1418 return r;
1420 /*
1421 * Nothing more to do for the version command.
1422 */
1423 if (cmd == DM_VERSION_CMD)
1424 return 0;
1426 fn = lookup_ioctl(cmd);
1427 if (!fn) {
1428 DMWARN("dm_ctl_ioctl: unknown command 0x%x", command);
1429 return -ENOTTY;
1432 /*
1433 * Trying to avoid low memory issues when a device is
1434 * suspended.
1435 */
1436 current->flags |= PF_MEMALLOC;
1438 /*
1439 * Copy the parameters into kernel space.
1440 */
1441 r = copy_params(user, &param);
1443 current->flags &= ~PF_MEMALLOC;
1445 if (r)
1446 return r;
1448 r = validate_params(cmd, param);
1449 if (r)
1450 goto out;
1452 param_size = param->data_size;
1453 param->data_size = sizeof(*param);
1454 r = fn(param, param_size);
1456 /*
1457 * Copy the results back to userland.
1458 */
1459 if (!r && copy_to_user(user, param, param->data_size))
1460 r = -EFAULT;
1462 out:
1463 free_params(param);
1464 return r;
1467 static struct file_operations _ctl_fops = {
1468 .ioctl = ctl_ioctl,
1469 .owner = THIS_MODULE,
1470 };
1472 static struct miscdevice _dm_misc = {
1473 .minor = MISC_DYNAMIC_MINOR,
1474 .name = DM_NAME,
1475 .fops = &_ctl_fops
1476 };
1478 /*
1479 * Create misc character device and link to DM_DIR/control.
1480 */
1481 int __init dm_interface_init(void)
1483 int r;
1485 r = dm_hash_init();
1486 if (r)
1487 return r;
1489 r = misc_register(&_dm_misc);
1490 if (r) {
1491 DMERR("misc_register failed for control device");
1492 dm_hash_exit();
1493 return r;
1496 DMINFO("%d.%d.%d%s initialised: %s", DM_VERSION_MAJOR,
1497 DM_VERSION_MINOR, DM_VERSION_PATCHLEVEL, DM_VERSION_EXTRA,
1498 DM_DRIVER_EMAIL);
1499 return 0;
1502 void dm_interface_exit(void)
1504 if (misc_deregister(&_dm_misc) < 0)
1505 DMERR("misc_deregister failed for control device");
1507 dm_hash_exit();