direct-io.hg

view xen/common/dom0_ops.c @ 2693:2584528df9e1

bitkeeper revision 1.1159.123.2 (4177d169N58TtQXn_XJO4xNBKbMQUw)

Merge freefall.cl.cam.ac.uk:/auto/groups/xeno/BK/xeno.bk
into freefall.cl.cam.ac.uk:/local/scratch/kaf24/xeno
author kaf24@freefall.cl.cam.ac.uk
date Thu Oct 21 15:10:33 2004 +0000 (2004-10-21)
parents d7ee91c5f868 d8e27145f1eb
children 744573a15e95 bffd969a5a07
line source
1 /******************************************************************************
2 * dom0_ops.c
3 *
4 * Process command requests from domain-0 guest OS.
5 *
6 * Copyright (c) 2002, K A Fraser
7 */
9 #include <xen/config.h>
10 #include <xen/types.h>
11 #include <xen/lib.h>
12 #include <xen/mm.h>
13 #include <hypervisor-ifs/dom0_ops.h>
14 #include <xen/sched.h>
15 #include <xen/event.h>
16 #include <asm/domain_page.h>
17 #include <asm/pdb.h>
18 #include <xen/trace.h>
19 #include <xen/console.h>
20 #include <asm/shadow.h>
21 #include <hypervisor-ifs/sched_ctl.h>
23 #define TRC_DOM0OP_ENTER_BASE 0x00020000
24 #define TRC_DOM0OP_LEAVE_BASE 0x00030000
26 extern unsigned int alloc_new_dom_mem(struct domain *, unsigned int);
27 extern long arch_do_dom0_op(dom0_op_t *op, dom0_op_t *u_dom0_op);
28 extern void arch_getdomaininfo_ctxt(
29 struct domain *, full_execution_context_t *);
31 static inline int is_free_domid(domid_t dom)
32 {
33 struct domain *d;
35 if ( dom >= DOMID_FIRST_RESERVED )
36 return 0;
38 if ( (d = find_domain_by_id(dom)) == NULL )
39 return 1;
41 put_domain(d);
42 return 0;
43 }
45 /*
46 * Allocate a free domain id. We try to reuse domain ids in a fairly low range,
47 * only expanding the range when there are no free domain ids. This is to keep
48 * domain ids in a range depending on the number that exist simultaneously,
49 * rather than incrementing domain ids in the full 32-bit range.
50 */
51 static int allocate_domid(domid_t *pdom)
52 {
53 static spinlock_t domid_lock = SPIN_LOCK_UNLOCKED;
54 static domid_t curdom = 0;
55 static domid_t topdom = 101;
56 int err = 0;
57 domid_t dom;
59 spin_lock(&domid_lock);
61 /* Try to use a domain id in the range 0..topdom, starting at curdom. */
62 for ( dom = curdom + 1; dom != curdom; dom++ )
63 {
64 if ( dom == topdom )
65 dom = 1;
66 if ( is_free_domid(dom) )
67 goto exit;
68 }
70 /* Couldn't find a free domain id in 0..topdom, try higher. */
71 for ( dom = topdom; dom < DOMID_FIRST_RESERVED; dom++ )
72 {
73 if ( is_free_domid(dom) )
74 {
75 topdom = dom + 1;
76 goto exit;
77 }
78 }
80 /* No free domain ids. */
81 err = -ENOMEM;
83 exit:
84 if ( err == 0 )
85 {
86 curdom = dom;
87 *pdom = dom;
88 }
90 spin_unlock(&domid_lock);
91 return err;
92 }
94 long do_dom0_op(dom0_op_t *u_dom0_op)
95 {
96 long ret = 0;
97 dom0_op_t curop, *op = &curop;
99 if ( !IS_PRIV(current) )
100 return -EPERM;
102 if ( copy_from_user(op, u_dom0_op, sizeof(*op)) )
103 {
104 return -EFAULT;
105 }
107 if ( op->interface_version != DOM0_INTERFACE_VERSION )
108 {
109 return -EACCES;
110 }
112 TRACE_5D(TRC_DOM0OP_ENTER_BASE + op->cmd,
113 0, op->u.dummy[0], op->u.dummy[1],
114 op->u.dummy[2], op->u.dummy[3] );
116 switch ( op->cmd )
117 {
119 case DOM0_BUILDDOMAIN:
120 {
121 struct domain *d = find_domain_by_id(op->u.builddomain.domain);
122 ret = -EINVAL;
123 if ( d != NULL )
124 {
125 ret = final_setup_guestos(d, &op->u.builddomain);
126 put_domain(d);
127 }
128 }
129 break;
131 case DOM0_PAUSEDOMAIN:
132 {
133 struct domain *d = find_domain_by_id(op->u.pausedomain.domain);
134 ret = -ESRCH;
135 if ( d != NULL )
136 {
137 ret = -EINVAL;
138 if ( d != current )
139 {
140 domain_pause_by_systemcontroller(d);
141 ret = 0;
142 }
143 put_domain(d);
144 }
145 }
146 break;
148 case DOM0_UNPAUSEDOMAIN:
149 {
150 struct domain *d = find_domain_by_id(op->u.unpausedomain.domain);
151 ret = -ESRCH;
152 if ( d != NULL )
153 {
154 ret = -EINVAL;
155 if ( test_bit(DF_CONSTRUCTED, &d->flags) )
156 {
157 domain_unpause_by_systemcontroller(d);
158 ret = 0;
159 }
160 put_domain(d);
161 }
162 }
163 break;
165 case DOM0_CREATEDOMAIN:
166 {
167 struct domain *d;
168 unsigned int pro;
169 domid_t dom;
171 dom = op->u.createdomain.domain;
172 if ( (dom > 0) && (dom < DOMID_FIRST_RESERVED) )
173 {
174 ret = -EINVAL;
175 if ( !is_free_domid(dom) )
176 break;
177 }
178 else if ( (ret = allocate_domid(&dom)) != 0 )
179 break;
181 if ( op->u.createdomain.cpu == -1 )
182 {
183 /* Do an initial placement. Fix me for hyperthreading! */
184 struct domain *d;
185 int i, j=0, c[smp_num_cpus];
187 pro=0; /* keep compiler happy */
189 for (i=0;i<smp_num_cpus;i++)
190 c[i]=0;
192 for_each_domain ( d ) {
193 c[d->processor]++;
194 j++;
195 }
197 for (i=0;i<smp_num_cpus;i++) {
198 if( c[i]<j )
199 {
200 j = c[i];
201 pro = i;
202 }
203 }
204 }
205 else
206 pro = op->u.createdomain.cpu % smp_num_cpus;
208 ret = -ENOMEM;
209 if ( (d = do_createdomain(dom, pro)) == NULL )
210 break;
212 ret = alloc_new_dom_mem(d, op->u.createdomain.memory_kb);
213 if ( ret != 0 )
214 {
215 domain_kill(d);
216 break;
217 }
219 ret = 0;
221 op->u.createdomain.domain = d->domain;
222 copy_to_user(u_dom0_op, op, sizeof(*op));
223 }
224 break;
226 case DOM0_DESTROYDOMAIN:
227 {
228 struct domain *d = find_domain_by_id(op->u.destroydomain.domain);
229 ret = -ESRCH;
230 if ( d != NULL )
231 {
232 ret = -EINVAL;
233 if ( d != current )
234 {
235 domain_kill(d);
236 ret = 0;
237 }
238 put_domain(d);
239 }
240 }
241 break;
243 case DOM0_PINCPUDOMAIN:
244 {
245 domid_t dom = op->u.pincpudomain.domain;
246 struct domain *d = find_domain_by_id(dom);
247 int cpu = op->u.pincpudomain.cpu;
249 if ( d == NULL )
250 {
251 ret = -ESRCH;
252 break;
253 }
255 if ( d == current )
256 {
257 ret = -EINVAL;
258 put_domain(d);
259 break;
260 }
262 if ( cpu == -1 )
263 {
264 clear_bit(DF_CPUPINNED, &d->flags);
265 }
266 else
267 {
268 domain_pause(d);
269 synchronise_pagetables(~0UL);
270 if ( d->processor != (cpu % smp_num_cpus) )
271 set_bit(DF_MIGRATED, &d->flags);
272 set_bit(DF_CPUPINNED, &d->flags);
273 d->processor = cpu % smp_num_cpus;
274 domain_unpause(d);
275 }
277 put_domain(d);
278 }
279 break;
281 case DOM0_SCHEDCTL:
282 {
283 ret = sched_ctl(&op->u.schedctl);
284 copy_to_user(u_dom0_op, op, sizeof(*op));
285 }
286 break;
288 case DOM0_ADJUSTDOM:
289 {
290 ret = sched_adjdom(&op->u.adjustdom);
291 copy_to_user(u_dom0_op, op, sizeof(*op));
292 }
293 break;
295 case DOM0_GETMEMLIST:
296 {
297 int i;
298 struct domain *d = find_domain_by_id(op->u.getmemlist.domain);
299 unsigned long max_pfns = op->u.getmemlist.max_pfns;
300 unsigned long pfn;
301 unsigned long *buffer = op->u.getmemlist.buffer;
302 struct list_head *list_ent;
304 ret = -EINVAL;
305 if ( d != NULL )
306 {
307 ret = 0;
309 spin_lock(&d->page_alloc_lock);
310 list_ent = d->page_list.next;
311 for ( i = 0; (i < max_pfns) && (list_ent != &d->page_list); i++ )
312 {
313 pfn = list_entry(list_ent, struct pfn_info, list) -
314 frame_table;
315 if ( put_user(pfn, buffer) )
316 {
317 ret = -EFAULT;
318 break;
319 }
320 buffer++;
321 list_ent = frame_table[pfn].list.next;
322 }
323 spin_unlock(&d->page_alloc_lock);
325 op->u.getmemlist.num_pfns = i;
326 copy_to_user(u_dom0_op, op, sizeof(*op));
328 put_domain(d);
329 }
330 }
331 break;
333 case DOM0_GETDOMAININFO:
334 {
335 full_execution_context_t *c;
336 struct domain *d;
337 unsigned long flags;
339 read_lock_irqsave(&tasklist_lock, flags);
341 for_each_domain ( d )
342 {
343 if ( d->domain >= op->u.getdomaininfo.domain )
344 break;
345 }
347 if ( (d == NULL) || !get_domain(d) )
348 {
349 read_unlock_irqrestore(&tasklist_lock, flags);
350 ret = -ESRCH;
351 break;
352 }
354 read_unlock_irqrestore(&tasklist_lock, flags);
356 op->u.getdomaininfo.domain = d->domain;
358 op->u.getdomaininfo.flags =
359 (test_bit(DF_DYING, &d->flags) ? DOMFLAGS_DYING : 0) |
360 (test_bit(DF_CRASHED, &d->flags) ? DOMFLAGS_CRASHED : 0) |
361 (test_bit(DF_SHUTDOWN, &d->flags) ? DOMFLAGS_SHUTDOWN : 0) |
362 (test_bit(DF_CTRLPAUSE, &d->flags) ? DOMFLAGS_PAUSED : 0) |
363 (test_bit(DF_BLOCKED, &d->flags) ? DOMFLAGS_BLOCKED : 0) |
364 (test_bit(DF_RUNNING, &d->flags) ? DOMFLAGS_RUNNING : 0);
366 op->u.getdomaininfo.flags |= d->processor << DOMFLAGS_CPUSHIFT;
367 op->u.getdomaininfo.flags |=
368 d->shutdown_code << DOMFLAGS_SHUTDOWNSHIFT;
370 op->u.getdomaininfo.tot_pages = d->tot_pages;
371 op->u.getdomaininfo.max_pages = d->max_pages;
372 op->u.getdomaininfo.cpu_time = d->cpu_time;
373 op->u.getdomaininfo.shared_info_frame =
374 __pa(d->shared_info) >> PAGE_SHIFT;
376 if ( op->u.getdomaininfo.ctxt != NULL )
377 {
378 if ( (c = xmalloc(sizeof(*c))) == NULL )
379 {
380 ret = -ENOMEM;
381 put_domain(d);
382 break;
383 }
385 if ( d != current )
386 domain_pause(d);
388 arch_getdomaininfo_ctxt(d,c);
390 if ( d != current )
391 domain_unpause(d);
393 if ( copy_to_user(op->u.getdomaininfo.ctxt, c, sizeof(*c)) )
394 ret = -EINVAL;
396 if ( c != NULL )
397 xfree(c);
398 }
400 if ( copy_to_user(u_dom0_op, op, sizeof(*op)) )
401 ret = -EINVAL;
403 put_domain(d);
404 }
405 break;
407 case DOM0_GETPAGEFRAMEINFO:
408 {
409 struct pfn_info *page;
410 unsigned long pfn = op->u.getpageframeinfo.pfn;
411 domid_t dom = op->u.getpageframeinfo.domain;
412 struct domain *d;
414 ret = -EINVAL;
416 if ( unlikely(pfn >= max_page) ||
417 unlikely((d = find_domain_by_id(dom)) == NULL) )
418 break;
420 page = &frame_table[pfn];
422 if ( likely(get_page(page, d)) )
423 {
424 ret = 0;
426 op->u.getpageframeinfo.type = NOTAB;
428 if ( (page->u.inuse.type_info & PGT_count_mask) != 0 )
429 {
430 switch ( page->u.inuse.type_info & PGT_type_mask )
431 {
432 case PGT_l1_page_table:
433 op->u.getpageframeinfo.type = L1TAB;
434 break;
435 case PGT_l2_page_table:
436 op->u.getpageframeinfo.type = L2TAB;
437 break;
438 case PGT_l3_page_table:
439 op->u.getpageframeinfo.type = L3TAB;
440 break;
441 case PGT_l4_page_table:
442 op->u.getpageframeinfo.type = L4TAB;
443 break;
444 }
445 }
447 put_page(page);
448 }
450 put_domain(d);
452 copy_to_user(u_dom0_op, op, sizeof(*op));
453 }
454 break;
456 case DOM0_IOPL:
457 {
458 extern long do_iopl(domid_t, unsigned int);
459 ret = do_iopl(op->u.iopl.domain, op->u.iopl.iopl);
460 }
461 break;
463 #ifdef XEN_DEBUGGER
464 case DOM0_DEBUG:
465 {
466 pdb_do_debug(op);
467 copy_to_user(u_dom0_op, op, sizeof(*op));
468 ret = 0;
469 }
470 break;
471 #endif
473 case DOM0_SETTIME:
474 {
475 do_settime(op->u.settime.secs,
476 op->u.settime.usecs,
477 op->u.settime.system_time);
478 ret = 0;
479 }
480 break;
482 #ifdef TRACE_BUFFER
483 case DOM0_GETTBUFS:
484 {
485 ret = get_tb_info(&op->u.gettbufs);
486 copy_to_user(u_dom0_op, op, sizeof(*op));
487 }
488 break;
489 #endif
491 case DOM0_READCONSOLE:
492 {
493 ret = read_console_ring(op->u.readconsole.str,
494 op->u.readconsole.count,
495 op->u.readconsole.cmd);
496 }
497 break;
499 case DOM0_PHYSINFO:
500 {
501 extern int phys_proc_id[];
503 dom0_physinfo_t *pi = &op->u.physinfo;
505 int old_id = phys_proc_id[0];
506 int ht = 0;
508 while( ( ht < smp_num_cpus ) && ( phys_proc_id[ht] == old_id ) ) ht++;
510 pi->ht_per_core = ht;
511 pi->cores = smp_num_cpus / pi->ht_per_core;
512 pi->total_pages = max_page;
513 pi->free_pages = avail_domheap_pages();
514 pi->cpu_khz = cpu_khz;
516 copy_to_user(u_dom0_op, op, sizeof(*op));
517 ret = 0;
518 }
519 break;
521 case DOM0_PCIDEV_ACCESS:
522 {
523 extern int physdev_pci_access_modify(domid_t, int, int, int, int);
524 ret = physdev_pci_access_modify(op->u.pcidev_access.domain,
525 op->u.pcidev_access.bus,
526 op->u.pcidev_access.dev,
527 op->u.pcidev_access.func,
528 op->u.pcidev_access.enable);
529 }
530 break;
532 case DOM0_SCHED_ID:
533 {
534 op->u.sched_id.sched_id = sched_id();
535 copy_to_user(u_dom0_op, op, sizeof(*op));
536 ret = 0;
537 }
538 break;
540 case DOM0_SETDOMAININITIALMEM:
541 {
542 struct domain *d;
543 ret = -ESRCH;
544 d = find_domain_by_id(op->u.setdomaininitialmem.domain);
545 if ( d != NULL )
546 {
547 /* should only be used *before* domain is built. */
548 if ( !test_bit(DF_CONSTRUCTED, &d->flags) )
549 ret = alloc_new_dom_mem(
550 d, op->u.setdomaininitialmem.initial_memkb );
551 else
552 ret = -EINVAL;
553 put_domain(d);
554 }
555 }
556 break;
558 case DOM0_SETDOMAINMAXMEM:
559 {
560 struct domain *d;
561 ret = -ESRCH;
562 d = find_domain_by_id( op->u.setdomainmaxmem.domain );
563 if ( d != NULL )
564 {
565 d->max_pages =
566 (op->u.setdomainmaxmem.max_memkb+PAGE_SIZE-1)>> PAGE_SHIFT;
567 put_domain(d);
568 ret = 0;
569 }
570 }
571 break;
573 case DOM0_GETPAGEFRAMEINFO2:
574 {
575 #define GPF2_BATCH 128
576 int n,j;
577 int num = op->u.getpageframeinfo2.num;
578 domid_t dom = op->u.getpageframeinfo2.domain;
579 unsigned long *s_ptr = (unsigned long*) op->u.getpageframeinfo2.array;
580 struct domain *d;
581 unsigned long l_arr[GPF2_BATCH];
582 ret = -ESRCH;
584 if ( unlikely((d = find_domain_by_id(dom)) == NULL) )
585 break;
587 if ( unlikely(num > 1024) )
588 {
589 ret = -E2BIG;
590 break;
591 }
593 ret = 0;
594 for( n = 0; n < num; )
595 {
596 int k = ((num-n)>GPF2_BATCH)?GPF2_BATCH:(num-n);
598 if ( copy_from_user(l_arr, &s_ptr[n], k*sizeof(unsigned long)) )
599 {
600 ret = -EINVAL;
601 break;
602 }
604 for( j = 0; j < k; j++ )
605 {
606 struct pfn_info *page;
607 unsigned long mfn = l_arr[j];
609 if ( unlikely(mfn >= max_page) )
610 goto e2_err;
612 page = &frame_table[mfn];
614 if ( likely(get_page(page, d)) )
615 {
616 unsigned long type = 0;
618 switch( page->u.inuse.type_info & PGT_type_mask )
619 {
620 case PGT_l1_page_table:
621 type = L1TAB;
622 break;
623 case PGT_l2_page_table:
624 type = L2TAB;
625 break;
626 case PGT_l3_page_table:
627 type = L3TAB;
628 break;
629 case PGT_l4_page_table:
630 type = L4TAB;
631 break;
632 }
634 if ( page->u.inuse.type_info & PGT_pinned )
635 type |= LPINTAB;
636 l_arr[j] |= type;
637 put_page(page);
638 }
639 else
640 {
641 e2_err:
642 l_arr[j] |= XTAB;
643 }
645 }
647 if ( copy_to_user(&s_ptr[n], l_arr, k*sizeof(unsigned long)) )
648 {
649 ret = -EINVAL;
650 break;
651 }
653 n += j;
654 }
656 put_domain(d);
657 }
658 break;
660 case DOM0_SETDOMAINVMASSIST:
661 {
662 struct domain *d;
663 ret = -ESRCH;
664 d = find_domain_by_id( op->u.setdomainmaxmem.domain );
665 if ( d != NULL )
666 {
667 vm_assist(d, op->u.setdomainvmassist.cmd,
668 op->u.setdomainvmassist.type);
669 put_domain(d);
670 ret = 0;
671 }
672 }
673 break;
675 default:
676 ret = arch_do_dom0_op(op,u_dom0_op);
678 }
680 TRACE_5D(TRC_DOM0OP_LEAVE_BASE + op->cmd, ret,
681 op->u.dummy[0], op->u.dummy[1], op->u.dummy[2], op->u.dummy[3]);
684 return ret;
685 }