direct-io.hg

view xen/common/dom0_ops.c @ 3231:9035b6656818

bitkeeper revision 1.1159.187.48 (41adc6420WlNaaoUkvfgNxl44rpYYg)

Export Xen s/w perfctrs to DOM0 via new 'xenperf' utility.
author kaf24@scramble.cl.cam.ac.uk
date Wed Dec 01 13:25:22 2004 +0000 (2004-12-01)
parents b7f0cff13881
children f2e12f9f7cc8
line source
1 /******************************************************************************
2 * dom0_ops.c
3 *
4 * Process command requests from domain-0 guest OS.
5 *
6 * Copyright (c) 2002, K A Fraser
7 */
9 #include <xen/config.h>
10 #include <xen/types.h>
11 #include <xen/lib.h>
12 #include <xen/mm.h>
13 #include <public/dom0_ops.h>
14 #include <xen/sched.h>
15 #include <xen/event.h>
16 #include <asm/domain_page.h>
17 #include <asm/pdb.h>
18 #include <xen/trace.h>
19 #include <xen/console.h>
20 #include <asm/shadow.h>
21 #include <public/sched_ctl.h>
23 #define TRC_DOM0OP_ENTER_BASE 0x00020000
24 #define TRC_DOM0OP_LEAVE_BASE 0x00030000
26 extern unsigned int alloc_new_dom_mem(struct domain *, unsigned int);
27 extern long arch_do_dom0_op(dom0_op_t *op, dom0_op_t *u_dom0_op);
28 extern void arch_getdomaininfo_ctxt(
29 struct domain *, full_execution_context_t *);
31 static inline int is_free_domid(domid_t dom)
32 {
33 struct domain *d;
35 if ( dom >= DOMID_FIRST_RESERVED )
36 return 0;
38 if ( (d = find_domain_by_id(dom)) == NULL )
39 return 1;
41 put_domain(d);
42 return 0;
43 }
45 /*
46 * Allocate a free domain id. We try to reuse domain ids in a fairly low range,
47 * only expanding the range when there are no free domain ids. This is to keep
48 * domain ids in a range depending on the number that exist simultaneously,
49 * rather than incrementing domain ids in the full 32-bit range.
50 */
51 static int allocate_domid(domid_t *pdom)
52 {
53 static spinlock_t domid_lock = SPIN_LOCK_UNLOCKED;
54 static domid_t curdom = 0;
55 static domid_t topdom = 101;
56 int err = 0;
57 domid_t dom;
59 spin_lock(&domid_lock);
61 /* Try to use a domain id in the range 0..topdom, starting at curdom. */
62 for ( dom = curdom + 1; dom != curdom; dom++ )
63 {
64 if ( dom == topdom )
65 dom = 1;
66 if ( is_free_domid(dom) )
67 goto exit;
68 }
70 /* Couldn't find a free domain id in 0..topdom, try higher. */
71 for ( dom = topdom; dom < DOMID_FIRST_RESERVED; dom++ )
72 {
73 if ( is_free_domid(dom) )
74 {
75 topdom = dom + 1;
76 goto exit;
77 }
78 }
80 /* No free domain ids. */
81 err = -ENOMEM;
83 exit:
84 if ( err == 0 )
85 {
86 curdom = dom;
87 *pdom = dom;
88 }
90 spin_unlock(&domid_lock);
91 return err;
92 }
94 long do_dom0_op(dom0_op_t *u_dom0_op)
95 {
96 long ret = 0;
97 dom0_op_t curop, *op = &curop;
99 if ( !IS_PRIV(current) )
100 return -EPERM;
102 if ( copy_from_user(op, u_dom0_op, sizeof(*op)) )
103 return -EFAULT;
105 if ( op->interface_version != DOM0_INTERFACE_VERSION )
106 return -EACCES;
108 TRACE_5D(TRC_DOM0OP_ENTER_BASE + op->cmd,
109 0, op->u.dummy[0], op->u.dummy[1],
110 op->u.dummy[2], op->u.dummy[3] );
112 switch ( op->cmd )
113 {
115 case DOM0_BUILDDOMAIN:
116 {
117 struct domain *d = find_domain_by_id(op->u.builddomain.domain);
118 ret = -EINVAL;
119 if ( d != NULL )
120 {
121 ret = final_setup_guestos(d, &op->u.builddomain);
122 put_domain(d);
123 }
124 }
125 break;
127 case DOM0_PAUSEDOMAIN:
128 {
129 struct domain *d = find_domain_by_id(op->u.pausedomain.domain);
130 ret = -ESRCH;
131 if ( d != NULL )
132 {
133 ret = -EINVAL;
134 if ( d != current )
135 {
136 domain_pause_by_systemcontroller(d);
137 ret = 0;
138 }
139 put_domain(d);
140 }
141 }
142 break;
144 case DOM0_UNPAUSEDOMAIN:
145 {
146 struct domain *d = find_domain_by_id(op->u.unpausedomain.domain);
147 ret = -ESRCH;
148 if ( d != NULL )
149 {
150 ret = -EINVAL;
151 if ( test_bit(DF_CONSTRUCTED, &d->flags) )
152 {
153 domain_unpause_by_systemcontroller(d);
154 ret = 0;
155 }
156 put_domain(d);
157 }
158 }
159 break;
161 case DOM0_CREATEDOMAIN:
162 {
163 struct domain *d;
164 unsigned int pro = 0;
165 domid_t dom;
167 dom = op->u.createdomain.domain;
168 if ( (dom > 0) && (dom < DOMID_FIRST_RESERVED) )
169 {
170 ret = -EINVAL;
171 if ( !is_free_domid(dom) )
172 break;
173 }
174 else if ( (ret = allocate_domid(&dom)) != 0 )
175 break;
177 if ( op->u.createdomain.cpu == -1 )
178 {
179 /* Do an initial placement. Pick the least-populated CPU. */
180 struct domain *d;
181 unsigned int i, cnt[NR_CPUS] = { 0 };
183 read_lock(&domlist_lock);
184 for_each_domain ( d )
185 cnt[d->processor]++;
186 read_unlock(&domlist_lock);
188 for ( i = 0; i < smp_num_cpus; i++ )
189 if ( cnt[i] < cnt[pro] )
190 pro = i;
191 }
192 else
193 pro = op->u.createdomain.cpu % smp_num_cpus;
195 ret = -ENOMEM;
196 if ( (d = do_createdomain(dom, pro)) == NULL )
197 break;
199 ret = alloc_new_dom_mem(d, op->u.createdomain.memory_kb);
200 if ( ret != 0 )
201 {
202 domain_kill(d);
203 break;
204 }
206 ret = 0;
208 op->u.createdomain.domain = d->id;
209 copy_to_user(u_dom0_op, op, sizeof(*op));
210 }
211 break;
213 case DOM0_DESTROYDOMAIN:
214 {
215 struct domain *d = find_domain_by_id(op->u.destroydomain.domain);
216 ret = -ESRCH;
217 if ( d != NULL )
218 {
219 ret = -EINVAL;
220 if ( d != current )
221 {
222 domain_kill(d);
223 ret = 0;
224 }
225 put_domain(d);
226 }
227 }
228 break;
230 case DOM0_PINCPUDOMAIN:
231 {
232 domid_t dom = op->u.pincpudomain.domain;
233 struct domain *d = find_domain_by_id(dom);
234 int cpu = op->u.pincpudomain.cpu;
236 if ( d == NULL )
237 {
238 ret = -ESRCH;
239 break;
240 }
242 if ( d == current )
243 {
244 ret = -EINVAL;
245 put_domain(d);
246 break;
247 }
249 if ( cpu == -1 )
250 {
251 clear_bit(DF_CPUPINNED, &d->flags);
252 }
253 else
254 {
255 domain_pause(d);
256 synchronise_pagetables(~0UL);
257 if ( d->processor != (cpu % smp_num_cpus) )
258 set_bit(DF_MIGRATED, &d->flags);
259 set_bit(DF_CPUPINNED, &d->flags);
260 d->processor = cpu % smp_num_cpus;
261 domain_unpause(d);
262 }
264 put_domain(d);
265 }
266 break;
268 case DOM0_SCHEDCTL:
269 {
270 ret = sched_ctl(&op->u.schedctl);
271 copy_to_user(u_dom0_op, op, sizeof(*op));
272 }
273 break;
275 case DOM0_ADJUSTDOM:
276 {
277 ret = sched_adjdom(&op->u.adjustdom);
278 copy_to_user(u_dom0_op, op, sizeof(*op));
279 }
280 break;
282 case DOM0_GETMEMLIST:
283 {
284 int i;
285 struct domain *d = find_domain_by_id(op->u.getmemlist.domain);
286 unsigned long max_pfns = op->u.getmemlist.max_pfns;
287 unsigned long pfn;
288 unsigned long *buffer = op->u.getmemlist.buffer;
289 struct list_head *list_ent;
291 ret = -EINVAL;
292 if ( d != NULL )
293 {
294 ret = 0;
296 spin_lock(&d->page_alloc_lock);
297 list_ent = d->page_list.next;
298 for ( i = 0; (i < max_pfns) && (list_ent != &d->page_list); i++ )
299 {
300 pfn = list_entry(list_ent, struct pfn_info, list) -
301 frame_table;
302 if ( put_user(pfn, buffer) )
303 {
304 ret = -EFAULT;
305 break;
306 }
307 buffer++;
308 list_ent = frame_table[pfn].list.next;
309 }
310 spin_unlock(&d->page_alloc_lock);
312 op->u.getmemlist.num_pfns = i;
313 copy_to_user(u_dom0_op, op, sizeof(*op));
315 put_domain(d);
316 }
317 }
318 break;
320 case DOM0_GETDOMAININFO:
321 {
322 full_execution_context_t *c;
323 struct domain *d;
325 read_lock(&domlist_lock);
327 for_each_domain ( d )
328 {
329 if ( d->id >= op->u.getdomaininfo.domain )
330 break;
331 }
333 if ( (d == NULL) || !get_domain(d) )
334 {
335 read_unlock(&domlist_lock);
336 ret = -ESRCH;
337 break;
338 }
340 read_unlock(&domlist_lock);
342 op->u.getdomaininfo.domain = d->id;
344 op->u.getdomaininfo.flags =
345 (test_bit(DF_DYING, &d->flags) ? DOMFLAGS_DYING : 0) |
346 (test_bit(DF_CRASHED, &d->flags) ? DOMFLAGS_CRASHED : 0) |
347 (test_bit(DF_SHUTDOWN, &d->flags) ? DOMFLAGS_SHUTDOWN : 0) |
348 (test_bit(DF_CTRLPAUSE, &d->flags) ? DOMFLAGS_PAUSED : 0) |
349 (test_bit(DF_BLOCKED, &d->flags) ? DOMFLAGS_BLOCKED : 0) |
350 (test_bit(DF_RUNNING, &d->flags) ? DOMFLAGS_RUNNING : 0);
352 op->u.getdomaininfo.flags |= d->processor << DOMFLAGS_CPUSHIFT;
353 op->u.getdomaininfo.flags |=
354 d->shutdown_code << DOMFLAGS_SHUTDOWNSHIFT;
356 op->u.getdomaininfo.tot_pages = d->tot_pages;
357 op->u.getdomaininfo.max_pages = d->max_pages;
358 op->u.getdomaininfo.cpu_time = d->cpu_time;
359 op->u.getdomaininfo.shared_info_frame =
360 __pa(d->shared_info) >> PAGE_SHIFT;
362 if ( op->u.getdomaininfo.ctxt != NULL )
363 {
364 if ( (c = xmalloc(sizeof(*c))) == NULL )
365 {
366 ret = -ENOMEM;
367 put_domain(d);
368 break;
369 }
371 if ( d != current )
372 domain_pause(d);
374 arch_getdomaininfo_ctxt(d,c);
376 if ( d != current )
377 domain_unpause(d);
379 if ( copy_to_user(op->u.getdomaininfo.ctxt, c, sizeof(*c)) )
380 ret = -EINVAL;
382 if ( c != NULL )
383 xfree(c);
384 }
386 if ( copy_to_user(u_dom0_op, op, sizeof(*op)) )
387 ret = -EINVAL;
389 put_domain(d);
390 }
391 break;
393 case DOM0_GETPAGEFRAMEINFO:
394 {
395 struct pfn_info *page;
396 unsigned long pfn = op->u.getpageframeinfo.pfn;
397 domid_t dom = op->u.getpageframeinfo.domain;
398 struct domain *d;
400 ret = -EINVAL;
402 if ( unlikely(pfn >= max_page) ||
403 unlikely((d = find_domain_by_id(dom)) == NULL) )
404 break;
406 page = &frame_table[pfn];
408 if ( likely(get_page(page, d)) )
409 {
410 ret = 0;
412 op->u.getpageframeinfo.type = NOTAB;
414 if ( (page->u.inuse.type_info & PGT_count_mask) != 0 )
415 {
416 switch ( page->u.inuse.type_info & PGT_type_mask )
417 {
418 case PGT_l1_page_table:
419 op->u.getpageframeinfo.type = L1TAB;
420 break;
421 case PGT_l2_page_table:
422 op->u.getpageframeinfo.type = L2TAB;
423 break;
424 case PGT_l3_page_table:
425 op->u.getpageframeinfo.type = L3TAB;
426 break;
427 case PGT_l4_page_table:
428 op->u.getpageframeinfo.type = L4TAB;
429 break;
430 }
431 }
433 put_page(page);
434 }
436 put_domain(d);
438 copy_to_user(u_dom0_op, op, sizeof(*op));
439 }
440 break;
442 case DOM0_IOPL:
443 {
444 extern long do_iopl(domid_t, unsigned int);
445 ret = do_iopl(op->u.iopl.domain, op->u.iopl.iopl);
446 }
447 break;
449 #ifdef XEN_DEBUGGER
450 case DOM0_DEBUG:
451 {
452 pdb_do_debug(op);
453 copy_to_user(u_dom0_op, op, sizeof(*op));
454 ret = 0;
455 }
456 break;
457 #endif
459 case DOM0_SETTIME:
460 {
461 do_settime(op->u.settime.secs,
462 op->u.settime.usecs,
463 op->u.settime.system_time);
464 ret = 0;
465 }
466 break;
468 #ifdef TRACE_BUFFER
469 case DOM0_GETTBUFS:
470 {
471 ret = get_tb_info(&op->u.gettbufs);
472 copy_to_user(u_dom0_op, op, sizeof(*op));
473 }
474 break;
475 #endif
477 case DOM0_READCONSOLE:
478 {
479 ret = read_console_ring(op->u.readconsole.str,
480 op->u.readconsole.count,
481 op->u.readconsole.cmd);
482 }
483 break;
485 case DOM0_PHYSINFO:
486 {
487 dom0_physinfo_t *pi = &op->u.physinfo;
489 pi->ht_per_core = opt_noht ? 1 : ht_per_core;
490 pi->cores = smp_num_cpus / pi->ht_per_core;
491 pi->total_pages = max_page;
492 pi->free_pages = avail_domheap_pages();
493 pi->cpu_khz = cpu_khz;
495 copy_to_user(u_dom0_op, op, sizeof(*op));
496 ret = 0;
497 }
498 break;
500 case DOM0_PCIDEV_ACCESS:
501 {
502 extern int physdev_pci_access_modify(domid_t, int, int, int, int);
503 ret = physdev_pci_access_modify(op->u.pcidev_access.domain,
504 op->u.pcidev_access.bus,
505 op->u.pcidev_access.dev,
506 op->u.pcidev_access.func,
507 op->u.pcidev_access.enable);
508 }
509 break;
511 case DOM0_SCHED_ID:
512 {
513 op->u.sched_id.sched_id = sched_id();
514 copy_to_user(u_dom0_op, op, sizeof(*op));
515 ret = 0;
516 }
517 break;
519 case DOM0_SETDOMAININITIALMEM:
520 {
521 struct domain *d;
522 ret = -ESRCH;
523 d = find_domain_by_id(op->u.setdomaininitialmem.domain);
524 if ( d != NULL )
525 {
526 /* should only be used *before* domain is built. */
527 if ( !test_bit(DF_CONSTRUCTED, &d->flags) )
528 ret = alloc_new_dom_mem(
529 d, op->u.setdomaininitialmem.initial_memkb );
530 else
531 ret = -EINVAL;
532 put_domain(d);
533 }
534 }
535 break;
537 case DOM0_SETDOMAINMAXMEM:
538 {
539 struct domain *d;
540 ret = -ESRCH;
541 d = find_domain_by_id( op->u.setdomainmaxmem.domain );
542 if ( d != NULL )
543 {
544 d->max_pages =
545 (op->u.setdomainmaxmem.max_memkb+PAGE_SIZE-1)>> PAGE_SHIFT;
546 put_domain(d);
547 ret = 0;
548 }
549 }
550 break;
552 case DOM0_GETPAGEFRAMEINFO2:
553 {
554 #define GPF2_BATCH 128
555 int n,j;
556 int num = op->u.getpageframeinfo2.num;
557 domid_t dom = op->u.getpageframeinfo2.domain;
558 unsigned long *s_ptr = (unsigned long*) op->u.getpageframeinfo2.array;
559 struct domain *d;
560 unsigned long l_arr[GPF2_BATCH];
561 ret = -ESRCH;
563 if ( unlikely((d = find_domain_by_id(dom)) == NULL) )
564 break;
566 if ( unlikely(num > 1024) )
567 {
568 ret = -E2BIG;
569 break;
570 }
572 ret = 0;
573 for( n = 0; n < num; )
574 {
575 int k = ((num-n)>GPF2_BATCH)?GPF2_BATCH:(num-n);
577 if ( copy_from_user(l_arr, &s_ptr[n], k*sizeof(unsigned long)) )
578 {
579 ret = -EINVAL;
580 break;
581 }
583 for( j = 0; j < k; j++ )
584 {
585 struct pfn_info *page;
586 unsigned long mfn = l_arr[j];
588 if ( unlikely(mfn >= max_page) )
589 goto e2_err;
591 page = &frame_table[mfn];
593 if ( likely(get_page(page, d)) )
594 {
595 unsigned long type = 0;
597 switch( page->u.inuse.type_info & PGT_type_mask )
598 {
599 case PGT_l1_page_table:
600 type = L1TAB;
601 break;
602 case PGT_l2_page_table:
603 type = L2TAB;
604 break;
605 case PGT_l3_page_table:
606 type = L3TAB;
607 break;
608 case PGT_l4_page_table:
609 type = L4TAB;
610 break;
611 }
613 if ( page->u.inuse.type_info & PGT_pinned )
614 type |= LPINTAB;
615 l_arr[j] |= type;
616 put_page(page);
617 }
618 else
619 {
620 e2_err:
621 l_arr[j] |= XTAB;
622 }
624 }
626 if ( copy_to_user(&s_ptr[n], l_arr, k*sizeof(unsigned long)) )
627 {
628 ret = -EINVAL;
629 break;
630 }
632 n += j;
633 }
635 put_domain(d);
636 }
637 break;
639 case DOM0_SETDOMAINVMASSIST:
640 {
641 struct domain *d;
642 ret = -ESRCH;
643 d = find_domain_by_id( op->u.setdomainmaxmem.domain );
644 if ( d != NULL )
645 {
646 vm_assist(d, op->u.setdomainvmassist.cmd,
647 op->u.setdomainvmassist.type);
648 put_domain(d);
649 ret = 0;
650 }
651 }
652 break;
654 #ifdef PERF_COUNTERS
655 case DOM0_PERFCCONTROL:
656 {
657 extern int perfc_control(dom0_perfccontrol_t *);
658 ret = perfc_control(&op->u.perfccontrol);
659 copy_to_user(u_dom0_op, op, sizeof(*op));
660 }
661 break;
662 #endif
664 default:
665 ret = arch_do_dom0_op(op,u_dom0_op);
667 }
669 TRACE_5D(TRC_DOM0OP_LEAVE_BASE + op->cmd, ret,
670 op->u.dummy[0], op->u.dummy[1], op->u.dummy[2], op->u.dummy[3]);
673 return ret;
674 }