ia64/xen-unstable

view xen/drivers/block/xen_vbd.c @ 1044:b51f4782dd4d

bitkeeper revision 1.683 (400c33c2I0UquUQa-QChvyqrE7-GRA)

xen_vbd.c:
Fix loop bounds when setting vbd extents.
author kaf24@scramble.cl.cam.ac.uk
date Mon Jan 19 19:45:06 2004 +0000 (2004-01-19)
parents 7c6c07befbdc
children a4c34e4a7c07
line source
1 /******************************************************************************
2 * xen_vbd.c
3 *
4 * Routines for managing virtual block devices.
5 *
6 * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
7 */
9 #include <xeno/config.h>
10 #include <xeno/types.h>
11 #include <xeno/lib.h>
12 #include <asm/io.h>
13 #include <xeno/slab.h>
14 #include <xeno/sched.h>
15 #include <xeno/vbd.h>
16 #include <xeno/blkdev.h>
17 #include <xeno/keyhandler.h>
18 #include <asm/current.h>
19 #include <asm/domain_page.h>
21 #include <hypervisor-ifs/hypervisor-if.h>
22 #include <xeno/event.h>
24 long __vbd_create(struct task_struct *p,
25 unsigned short vdevice,
26 unsigned char mode,
27 unsigned char type)
28 {
29 vbd_t *vbd;
30 rb_node_t **rb_p, *rb_parent = NULL;
31 long ret = 0;
32 unsigned long cpu_mask;
34 spin_lock(&p->vbd_lock);
36 rb_p = &p->vbd_rb.rb_node;
37 while ( *rb_p != NULL )
38 {
39 rb_parent = *rb_p;
40 vbd = rb_entry(rb_parent, vbd_t, rb);
41 if ( vdevice < vbd->vdevice )
42 {
43 rb_p = &rb_parent->rb_left;
44 }
45 else if ( vdevice > vbd->vdevice )
46 {
47 rb_p = &rb_parent->rb_right;
48 }
49 else
50 {
51 DPRINTK("vbd_create attempted for already existing vbd\n");
52 ret = -EINVAL;
53 goto out;
54 }
55 }
57 if ( unlikely((vbd = kmalloc(sizeof(vbd_t), GFP_KERNEL)) == NULL) )
58 {
59 DPRINTK("vbd_create: out of memory\n");
60 ret = -ENOMEM;
61 goto out;
62 }
64 vbd->vdevice = vdevice;
65 vbd->mode = mode;
66 vbd->type = type;
67 vbd->extents = NULL;
69 rb_link_node(&vbd->rb, rb_parent, rb_p);
70 rb_insert_color(&vbd->rb, &p->vbd_rb);
72 cpu_mask = mark_guest_event(p, _EVENT_VBD_UPD);
73 guest_event_notify(cpu_mask);
75 out:
76 spin_unlock(&p->vbd_lock);
77 return ret;
78 }
81 long vbd_create(vbd_create_t *create)
82 {
83 struct task_struct *p;
84 long rc;
86 if ( unlikely(!IS_PRIV(current)) )
87 return -EPERM;
89 if ( unlikely((p = find_domain_by_id(create->domain)) == NULL) )
90 {
91 DPRINTK("vbd_create attempted for non-existent domain %d\n",
92 domain);
93 return -EINVAL;
94 }
96 rc = __vbd_create(p, create->vdevice, create->mode,
97 XD_TYPE_DISK | XD_FLAG_VIRT);
99 put_task_struct(p);
101 return rc;
102 }
105 long __vbd_grow(struct task_struct *p,
106 unsigned short vdevice,
107 xen_extent_t *extent)
108 {
109 xen_extent_le_t **px, *x;
110 vbd_t *vbd = NULL;
111 rb_node_t *rb;
112 long ret = 0;
113 unsigned long cpu_mask;
115 spin_lock(&p->vbd_lock);
117 rb = p->vbd_rb.rb_node;
118 while ( rb != NULL )
119 {
120 vbd = rb_entry(rb, vbd_t, rb);
121 if ( vdevice < vbd->vdevice )
122 rb = rb->rb_left;
123 else if ( vdevice > vbd->vdevice )
124 rb = rb->rb_right;
125 else
126 break;
127 }
129 if ( unlikely(vbd == NULL) || unlikely(vbd->vdevice != vdevice) )
130 {
131 DPRINTK("vbd_grow: attempted to append extent to non-existent VBD.\n");
132 ret = -EINVAL;
133 goto out;
134 }
136 if ( unlikely((x = kmalloc(sizeof(xen_extent_le_t), GFP_KERNEL)) == NULL) )
137 {
138 DPRINTK("vbd_grow: out of memory\n");
139 ret = -ENOMEM;
140 goto out;
141 }
143 x->extent.device = extent->device;
144 x->extent.start_sector = extent->start_sector;
145 x->extent.nr_sectors = extent->nr_sectors;
146 x->next = (xen_extent_le_t *)NULL;
148 for ( px = &vbd->extents; *px != NULL; px = &(*px)->next )
149 continue;
151 *px = x;
153 cpu_mask = mark_guest_event(p, _EVENT_VBD_UPD);
154 guest_event_notify(cpu_mask);
156 out:
157 spin_unlock(&p->vbd_lock);
158 return ret;
159 }
162 /* Grow a VBD by appending a new extent. Fails if the VBD doesn't exist. */
163 long vbd_grow(vbd_grow_t *grow)
164 {
165 struct task_struct *p;
166 long rc;
168 if ( unlikely(!IS_PRIV(current)) )
169 return -EPERM;
171 if ( unlikely((p = find_domain_by_id(grow->domain)) == NULL) )
172 {
173 DPRINTK("vbd_grow: attempted for non-existent domain %d\n",
174 grow->domain);
175 return -EINVAL;
176 }
178 rc = __vbd_grow(p, grow->vdevice, &grow->extent);
180 put_task_struct(p);
182 return rc;
183 }
186 long vbd_shrink(vbd_shrink_t *shrink)
187 {
188 struct task_struct *p;
189 xen_extent_le_t **px, *x;
190 vbd_t *vbd = NULL;
191 rb_node_t *rb;
192 long ret = 0;
193 unsigned long cpu_mask;
195 if ( !IS_PRIV(current) )
196 return -EPERM;
198 if ( (p = find_domain_by_id(shrink->domain)) == NULL )
199 {
200 DPRINTK("vbd_shrink attempted for non-existent domain %d\n",
201 shrink->domain);
202 return -EINVAL;
203 }
205 spin_lock(&p->vbd_lock);
207 rb = p->vbd_rb.rb_node;
208 while ( rb != NULL )
209 {
210 vbd = rb_entry(rb, vbd_t, rb);
211 if ( shrink->vdevice < vbd->vdevice )
212 rb = rb->rb_left;
213 else if ( shrink->vdevice > vbd->vdevice )
214 rb = rb->rb_right;
215 else
216 break;
217 }
219 if ( unlikely(vbd == NULL) ||
220 unlikely(vbd->vdevice != shrink->vdevice) ||
221 unlikely(vbd->extents == NULL) )
222 {
223 DPRINTK("vbd_shrink: attempt to remove non-existent extent.\n");
224 ret = -EINVAL;
225 goto out;
226 }
228 /* Find the last extent. We now know that there is at least one. */
229 for ( px = &vbd->extents; (*px)->next != NULL; px = &(*px)->next )
230 continue;
232 x = *px;
233 *px = x->next;
234 kfree(x);
236 cpu_mask = mark_guest_event(p, _EVENT_VBD_UPD);
237 guest_event_notify(cpu_mask);
239 out:
240 spin_unlock(&p->vbd_lock);
241 put_task_struct(p);
242 return ret;
243 }
246 long vbd_setextents(vbd_setextents_t *setextents)
247 {
248 struct task_struct *p;
249 xen_extent_t e;
250 xen_extent_le_t *new_extents, *x, *t;
251 vbd_t *vbd = NULL;
252 rb_node_t *rb;
253 int i;
254 long ret = 0;
255 unsigned long cpu_mask;
257 if ( !IS_PRIV(current) )
258 return -EPERM;
260 if ( (p = find_domain_by_id(setextents->domain)) == NULL )
261 {
262 DPRINTK("vbd_setextents attempted for non-existent domain %d\n",
263 setextents->domain);
264 return -EINVAL;
265 }
267 spin_lock(&p->vbd_lock);
269 rb = p->vbd_rb.rb_node;
270 while ( rb != NULL )
271 {
272 vbd = rb_entry(rb, vbd_t, rb);
273 if ( setextents->vdevice < vbd->vdevice )
274 rb = rb->rb_left;
275 else if ( setextents->vdevice > vbd->vdevice )
276 rb = rb->rb_right;
277 else
278 break;
279 }
281 if ( unlikely(vbd == NULL) ||
282 unlikely(vbd->vdevice != setextents->vdevice) )
283 {
284 DPRINTK("vbd_setextents: attempt to modify non-existent VBD.\n");
285 ret = -EINVAL;
286 goto out;
287 }
289 /* Construct the new extent list. */
290 new_extents = NULL;
291 for ( i = setextents->nr_extents - 1; i >= 0; i-- )
292 {
293 if ( unlikely(copy_from_user(&e,
294 &setextents->extents[i],
295 sizeof(e)) != 0) )
296 {
297 DPRINTK("vbd_setextents: copy_from_user failed\n");
298 ret = -EFAULT;
299 goto free_and_out;
300 }
302 if ( unlikely((x = kmalloc(sizeof(xen_extent_le_t), GFP_KERNEL))
303 == NULL) )
304 {
305 DPRINTK("vbd_setextents: out of memory\n");
306 ret = -ENOMEM;
307 goto free_and_out;
308 }
310 x->extent = e;
311 x->next = new_extents;
313 new_extents = x;
314 }
316 /* Delete the old extent list _after_ successfully creating the new. */
317 for ( x = vbd->extents; x != NULL; x = t )
318 {
319 t = x->next;
320 kfree(x);
321 }
323 /* Make the new list visible. */
324 vbd->extents = new_extents;
326 cpu_mask = mark_guest_event(p, _EVENT_VBD_UPD);
327 guest_event_notify(cpu_mask);
329 out:
330 spin_unlock(&p->vbd_lock);
331 put_task_struct(p);
332 return ret;
334 free_and_out:
335 /* Failed part-way through the new list. Delete all that we managed. */
336 for ( x = new_extents; x != NULL; x = t )
337 {
338 t = x->next;
339 kfree(x);
340 }
341 goto out;
342 }
345 long vbd_delete(vbd_delete_t *delete)
346 {
347 struct task_struct *p;
348 vbd_t *vbd;
349 rb_node_t *rb;
350 xen_extent_le_t *x, *t;
351 unsigned long cpu_mask;
353 if( !IS_PRIV(current) )
354 return -EPERM;
356 if ( (p = find_domain_by_id(delete->domain)) == NULL )
357 {
358 DPRINTK("vbd_delete attempted for non-existent domain %d\n",
359 delete->domain);
360 return -EINVAL;
361 }
363 spin_lock(&p->vbd_lock);
365 rb = p->vbd_rb.rb_node;
366 while ( rb != NULL )
367 {
368 vbd = rb_entry(rb, vbd_t, rb);
369 if ( delete->vdevice < vbd->vdevice )
370 rb = rb->rb_left;
371 else if ( delete->vdevice > vbd->vdevice )
372 rb = rb->rb_right;
373 else
374 goto found;
375 }
377 DPRINTK("vbd_delete attempted for non-existing VBD.\n");
379 spin_unlock(&p->vbd_lock);
380 put_task_struct(p);
381 return -EINVAL;
383 found:
384 rb_erase(rb, &p->vbd_rb);
385 x = vbd->extents;
386 kfree(vbd);
388 while ( x != NULL )
389 {
390 t = x->next;
391 kfree(x);
392 x = t;
393 }
395 cpu_mask = mark_guest_event(p, _EVENT_VBD_UPD);
396 guest_event_notify(cpu_mask);
398 spin_unlock(&p->vbd_lock);
399 put_task_struct(p);
400 return 0;
401 }
404 void destroy_all_vbds(struct task_struct *p)
405 {
406 vbd_t *vbd;
407 rb_node_t *rb;
408 xen_extent_le_t *x, *t;
409 unsigned long cpu_mask;
411 spin_lock(&p->vbd_lock);
413 while ( (rb = p->vbd_rb.rb_node) != NULL )
414 {
415 vbd = rb_entry(rb, vbd_t, rb);
417 rb_erase(rb, &p->vbd_rb);
418 x = vbd->extents;
419 kfree(vbd);
421 while ( x != NULL )
422 {
423 t = x->next;
424 kfree(x);
425 x = t;
426 }
427 }
429 cpu_mask = mark_guest_event(p, _EVENT_VBD_UPD);
430 guest_event_notify(cpu_mask);
432 spin_unlock(&p->vbd_lock);
433 }
436 static int vbd_probe_single(xen_disk_info_t *xdi,
437 vbd_t *vbd,
438 struct task_struct *p)
439 {
440 xen_extent_le_t *x;
441 xen_disk_t cur_disk;
443 if ( xdi->count == xdi->max )
444 {
445 DPRINTK("vbd_probe_devices: out of space for probe.\n");
446 return -ENOMEM;
447 }
449 cur_disk.device = vbd->vdevice;
450 cur_disk.info = vbd->type;
451 if ( !VBD_CAN_WRITE(vbd) )
452 cur_disk.info |= XD_FLAG_RO;
453 cur_disk.capacity = 0 ;
454 for ( x = vbd->extents; x != NULL; x = x->next )
455 cur_disk.capacity += x->extent.nr_sectors;
456 cur_disk.domain = p->domain;
458 /* Now copy into relevant part of user-space buffer */
459 if( copy_to_user(&xdi->disks[xdi->count],
460 &cur_disk,
461 sizeof(xen_disk_t)) )
462 {
463 DPRINTK("vbd_probe_devices: copy_to_user failed\n");
464 return -EFAULT;
465 }
467 xdi->count++;
469 return 0;
470 }
473 static int vbd_probe_devices(xen_disk_info_t *xdi, struct task_struct *p)
474 {
475 int rc = 0;
476 rb_node_t *rb;
478 spin_lock(&p->vbd_lock);
480 if ( (rb = p->vbd_rb.rb_node) == NULL )
481 goto out;
483 new_subtree:
484 /* STEP 1. Find least node (it'll be left-most). */
485 while ( rb->rb_left != NULL )
486 rb = rb->rb_left;
488 for ( ; ; )
489 {
490 /* STEP 2. Dealt with left subtree. Now process current node. */
491 if ( (rc = vbd_probe_single(xdi, rb_entry(rb, vbd_t, rb), p)) != 0 )
492 goto out;
494 /* STEP 3. Process right subtree, if any. */
495 if ( rb->rb_right != NULL )
496 {
497 rb = rb->rb_right;
498 goto new_subtree;
499 }
501 /* STEP 4. Done both subtrees. Head back through ancesstors. */
502 for ( ; ; )
503 {
504 /* We're done when we get back to the root node. */
505 if ( rb->rb_parent == NULL )
506 goto out;
507 /* If we are left of parent, then parent is next to process. */
508 if ( rb->rb_parent->rb_left == rb )
509 break;
510 /* If we are right of parent, then we climb to grandparent. */
511 rb = rb->rb_parent;
512 }
514 rb = rb->rb_parent;
515 }
517 out:
518 spin_unlock(&p->vbd_lock);
519 return rc;
520 }
523 /*
524 * Return information about the VBDs available for a given domain, or for all
525 * domains; in the general case the 'domain' argument will be 0 which means
526 * "information about the caller"; otherwise the 'domain' argument will
527 * specify either a given domain, or all domains ("VBD_PROBE_ALL") -- both of
528 * these cases require the caller to be privileged.
529 */
530 long vbd_probe(vbd_probe_t *probe)
531 {
532 struct task_struct *p = NULL;
533 unsigned long flags;
534 long ret = 0;
536 if ( probe->domain != 0 )
537 {
538 /* We can only probe for ourselves (unless we're privileged). */
539 if( (probe->domain != current->domain) && !IS_PRIV(current) )
540 return -EPERM;
542 if ( (probe->domain != VBD_PROBE_ALL) &&
543 ((p = find_domain_by_id(probe->domain)) == NULL) )
544 {
545 DPRINTK("vbd_probe attempted for non-existent domain %d\n",
546 probe->domain);
547 return -EINVAL;
548 }
549 }
550 else
551 {
552 /* Default is to probe for ourselves. */
553 p = current;
554 get_task_struct(p); /* to mirror final put_task_struct */
555 }
557 if ( probe->domain == VBD_PROBE_ALL )
558 {
559 read_lock_irqsave(&tasklist_lock, flags);
560 p = &idle0_task;
561 while ( (p = p->next_task) != &idle0_task )
562 {
563 if ( !is_idle_task(p) )
564 {
565 if( (ret = vbd_probe_devices(&probe->xdi, p)) != 0 )
566 {
567 read_unlock_irqrestore(&tasklist_lock, flags);
568 goto out;
569 }
570 }
571 }
572 read_unlock_irqrestore(&tasklist_lock, flags);
573 }
574 else if ( (ret = vbd_probe_devices(&probe->xdi, p)) != 0 )
575 goto out;
577 out:
578 if ( ret != 0 )
579 DPRINTK("vbd_probe: err %ld in probing virtual devices\n", ret);
580 if ( p != NULL )
581 put_task_struct(p);
582 return ret;
583 }
586 long vbd_info(vbd_info_t *info)
587 {
588 struct task_struct *p;
589 xen_extent_le_t *x;
590 xen_extent_t *extents;
591 vbd_t *vbd = NULL;
592 rb_node_t *rb;
593 long ret = 0;
595 if ( (info->domain != current->domain) && !IS_PRIV(current) )
596 return -EPERM;
598 if ( (p = find_domain_by_id(info->domain)) == NULL )
599 {
600 DPRINTK("vbd_info attempted for non-existent domain %d\n",
601 info->domain);
602 return -EINVAL;
603 }
605 spin_lock(&p->vbd_lock);
607 rb = p->vbd_rb.rb_node;
608 while ( rb != NULL )
609 {
610 vbd = rb_entry(rb, vbd_t, rb);
611 if ( info->vdevice < vbd->vdevice )
612 rb = rb->rb_left;
613 else if ( info->vdevice > vbd->vdevice )
614 rb = rb->rb_right;
615 else
616 break;
617 }
619 if ( unlikely(vbd == NULL) || unlikely(vbd->vdevice != info->vdevice) )
620 {
621 DPRINTK("vbd_info attempted on non-existent VBD.\n");
622 ret = -EINVAL;
623 goto out;
624 }
626 info->mode = vbd->mode;
627 info->nextents = 0;
629 extents = info->extents;
630 for ( x = vbd->extents; x != NULL; x = x->next )
631 {
632 if ( info->nextents == info->maxextents )
633 break;
634 if ( copy_to_user(extents, &x->extent, sizeof(xen_extent_t)) )
635 {
636 DPRINTK("vbd_info: copy_to_user failed\n");
637 ret = -EFAULT;
638 goto out;
639 }
640 extents++;
641 info->nextents++;
642 }
644 out:
645 spin_unlock(&p->vbd_lock);
646 put_task_struct(p);
647 return ret;
648 }
651 int vbd_translate(phys_seg_t *pseg, struct task_struct *p, int operation)
652 {
653 xen_extent_le_t *x;
654 vbd_t *vbd;
655 rb_node_t *rb;
656 unsigned long sec_off, nr_secs;
658 spin_lock(&p->vbd_lock);
660 rb = p->vbd_rb.rb_node;
661 while ( rb != NULL )
662 {
663 vbd = rb_entry(rb, vbd_t, rb);
664 if ( pseg->dev < vbd->vdevice )
665 rb = rb->rb_left;
666 else if ( pseg->dev > vbd->vdevice )
667 rb = rb->rb_right;
668 else
669 goto found;
670 }
672 DPRINTK("vbd_translate; domain %d attempted to access "
673 "non-existent VBD.\n", p->domain);
675 spin_unlock(&p->vbd_lock);
676 return -ENODEV;
678 found:
680 if ( ((operation == READ) && !VBD_CAN_READ(vbd)) ||
681 ((operation == WRITE) && !VBD_CAN_WRITE(vbd)) )
682 {
683 spin_unlock(&p->vbd_lock);
684 return -EACCES;
685 }
687 /*
688 * Now iterate through the list of xen_extents, working out which should
689 * be used to perform the translation.
690 */
691 sec_off = pseg->sector_number;
692 nr_secs = pseg->nr_sects;
693 for ( x = vbd->extents; x != NULL; x = x->next )
694 {
695 if ( sec_off < x->extent.nr_sectors )
696 {
697 pseg->dev = x->extent.device;
698 pseg->sector_number = x->extent.start_sector + sec_off;
699 if ( unlikely((sec_off + nr_secs) > x->extent.nr_sectors) )
700 goto overrun;
701 spin_unlock(&p->vbd_lock);
702 return 1;
703 }
704 sec_off -= x->extent.nr_sectors;
705 }
707 DPRINTK("vbd_translate: end of vbd.\n");
708 spin_unlock(&p->vbd_lock);
709 return -EACCES;
711 /*
712 * Here we deal with overrun onto the following extent. We don't deal with
713 * overrun of more than one boundary since each request is restricted to
714 * 2^9 512-byte sectors, so it should be trivial for control software to
715 * ensure that extents are large enough to prevent excessive overrun.
716 */
717 overrun:
719 /* Adjust length of first chunk to run to end of first extent. */
720 pseg[0].nr_sects = x->extent.nr_sectors - sec_off;
722 /* Set second chunk buffer and length to start where first chunk ended. */
723 pseg[1].buffer = pseg[0].buffer + (pseg[0].nr_sects << 9);
724 pseg[1].nr_sects = nr_secs - pseg[0].nr_sects;
726 /* Now move to the next extent. Check it exists and is long enough! */
727 if ( unlikely((x = x->next) == NULL) ||
728 unlikely(x->extent.nr_sectors < pseg[1].nr_sects) )
729 {
730 DPRINTK("vbd_translate: multiple overruns or end of vbd.\n");
731 spin_unlock(&p->vbd_lock);
732 return -EACCES;
733 }
735 /* Store the real device and start sector for the second chunk. */
736 pseg[1].dev = x->extent.device;
737 pseg[1].sector_number = x->extent.start_sector;
739 spin_unlock(&p->vbd_lock);
740 return 2;
741 }