direct-io.hg

view xen/arch/x86/dom0_ops.c @ 8686:c0a0f4db5ab1

Create a block of reserved PFNs in shadow translate mode guests, and
move the shared info and grant table pfns into that block. This
allows us to remove the get_gnttablist dom0 op, and simplifies the
domain creation code slightly. Having the reserved block managed by
Xen may also make it slightly easier to handle the case where the
grant table needs to be extended at run time.

Suggested-by: kaf24
Signed-off-by: Steven Smith, sos22@cam.ac.uk
author sos22@douglas.cl.cam.ac.uk
date Thu Jan 26 19:40:13 2006 +0100 (2006-01-26)
parents cc9bb3e0e348
children 5a9efc35feb2
line source
1 /******************************************************************************
2 * Arch-specific dom0_ops.c
3 *
4 * Process command requests from domain-0 guest OS.
5 *
6 * Copyright (c) 2002, K A Fraser
7 */
9 #include <xen/config.h>
10 #include <xen/types.h>
11 #include <xen/lib.h>
12 #include <xen/mm.h>
13 #include <public/dom0_ops.h>
14 #include <xen/sched.h>
15 #include <xen/event.h>
16 #include <xen/domain_page.h>
17 #include <asm/msr.h>
18 #include <xen/trace.h>
19 #include <xen/console.h>
20 #include <xen/iocap.h>
21 #include <asm/shadow.h>
22 #include <asm/irq.h>
23 #include <asm/processor.h>
24 #include <public/sched_ctl.h>
26 #include <asm/mtrr.h>
27 #include "mtrr/mtrr.h"
29 #define TRC_DOM0OP_ENTER_BASE 0x00020000
30 #define TRC_DOM0OP_LEAVE_BASE 0x00030000
32 static int msr_cpu_mask;
33 static unsigned long msr_addr;
34 static unsigned long msr_lo;
35 static unsigned long msr_hi;
37 static void write_msr_for(void *unused)
38 {
39 if ( ((1 << smp_processor_id()) & msr_cpu_mask) )
40 (void)wrmsr_user(msr_addr, msr_lo, msr_hi);
41 }
43 static void read_msr_for(void *unused)
44 {
45 if ( ((1 << smp_processor_id()) & msr_cpu_mask) )
46 (void)rdmsr_user(msr_addr, msr_lo, msr_hi);
47 }
49 long arch_do_dom0_op(dom0_op_t *op, dom0_op_t *u_dom0_op)
50 {
51 long ret = 0;
53 switch ( op->cmd )
54 {
56 case DOM0_MSR:
57 {
58 if ( op->u.msr.write )
59 {
60 msr_cpu_mask = op->u.msr.cpu_mask;
61 msr_addr = op->u.msr.msr;
62 msr_lo = op->u.msr.in1;
63 msr_hi = op->u.msr.in2;
64 smp_call_function(write_msr_for, NULL, 1, 1);
65 write_msr_for(NULL);
66 }
67 else
68 {
69 msr_cpu_mask = op->u.msr.cpu_mask;
70 msr_addr = op->u.msr.msr;
71 smp_call_function(read_msr_for, NULL, 1, 1);
72 read_msr_for(NULL);
74 op->u.msr.out1 = msr_lo;
75 op->u.msr.out2 = msr_hi;
76 copy_to_user(u_dom0_op, op, sizeof(*op));
77 }
78 ret = 0;
79 }
80 break;
82 case DOM0_SHADOW_CONTROL:
83 {
84 struct domain *d;
85 ret = -ESRCH;
86 d = find_domain_by_id(op->u.shadow_control.domain);
87 if ( d != NULL )
88 {
89 ret = shadow_mode_control(d, &op->u.shadow_control);
90 put_domain(d);
91 copy_to_user(u_dom0_op, op, sizeof(*op));
92 }
93 }
94 break;
96 case DOM0_ADD_MEMTYPE:
97 {
98 ret = mtrr_add_page(
99 op->u.add_memtype.pfn,
100 op->u.add_memtype.nr_pfns,
101 op->u.add_memtype.type,
102 1);
103 if (ret > 0)
104 {
105 (void)__put_user(0, &u_dom0_op->u.add_memtype.handle);
106 (void)__put_user(ret, &u_dom0_op->u.add_memtype.reg);
107 ret = 0;
108 }
109 }
110 break;
112 case DOM0_DEL_MEMTYPE:
113 {
114 if (op->u.del_memtype.handle == 0
115 /* mtrr/main.c otherwise does a lookup */
116 && (int)op->u.del_memtype.reg >= 0)
117 {
118 ret = mtrr_del_page(op->u.del_memtype.reg, 0, 0);
119 if (ret > 0)
120 ret = 0;
121 }
122 else
123 ret = -EINVAL;
124 }
125 break;
127 case DOM0_READ_MEMTYPE:
128 {
129 unsigned long pfn;
130 unsigned int nr_pfns;
131 mtrr_type type;
133 ret = -EINVAL;
134 if ( op->u.read_memtype.reg < num_var_ranges )
135 {
136 mtrr_if->get(op->u.read_memtype.reg, &pfn, &nr_pfns, &type);
137 (void)__put_user(pfn, &u_dom0_op->u.read_memtype.pfn);
138 (void)__put_user(nr_pfns, &u_dom0_op->u.read_memtype.nr_pfns);
139 (void)__put_user(type, &u_dom0_op->u.read_memtype.type);
140 ret = 0;
141 }
142 }
143 break;
145 case DOM0_MICROCODE:
146 {
147 extern int microcode_update(void *buf, unsigned long len);
148 ret = microcode_update(op->u.microcode.data, op->u.microcode.length);
149 }
150 break;
152 case DOM0_IOPORT_PERMISSION:
153 {
154 struct domain *d;
155 unsigned int fp = op->u.ioport_permission.first_port;
156 unsigned int np = op->u.ioport_permission.nr_ports;
158 ret = -EINVAL;
159 if ( (fp + np) > 65536 )
160 break;
162 ret = -ESRCH;
163 if ( unlikely((d = find_domain_by_id(
164 op->u.ioport_permission.domain)) == NULL) )
165 break;
167 if ( np == 0 )
168 ret = 0;
169 else if ( op->u.ioport_permission.allow_access )
170 ret = ioports_permit_access(d, fp, fp + np - 1);
171 else
172 ret = ioports_deny_access(d, fp, fp + np - 1);
174 put_domain(d);
175 }
176 break;
178 case DOM0_PHYSINFO:
179 {
180 dom0_physinfo_t *pi = &op->u.physinfo;
182 pi->threads_per_core = smp_num_siblings;
183 pi->cores_per_socket = boot_cpu_data.x86_num_cores;
184 pi->sockets_per_node =
185 num_online_cpus() / (pi->threads_per_core * pi->cores_per_socket);
186 pi->nr_nodes = 1;
187 pi->total_pages = total_pages;
188 pi->free_pages = avail_domheap_pages();
189 pi->cpu_khz = cpu_khz;
190 memset(pi->hw_cap, 0, sizeof(pi->hw_cap));
191 memcpy(pi->hw_cap, boot_cpu_data.x86_capability, NCAPINTS*4);
192 ret = 0;
193 if ( copy_to_user(u_dom0_op, op, sizeof(*op)) )
194 ret = -EFAULT;
195 }
196 break;
198 case DOM0_GETPAGEFRAMEINFO:
199 {
200 struct pfn_info *page;
201 unsigned long pfn = op->u.getpageframeinfo.pfn;
202 domid_t dom = op->u.getpageframeinfo.domain;
203 struct domain *d;
205 ret = -EINVAL;
207 if ( unlikely(pfn >= max_page) ||
208 unlikely((d = find_domain_by_id(dom)) == NULL) )
209 break;
211 page = pfn_to_page(pfn);
213 if ( likely(get_page(page, d)) )
214 {
215 ret = 0;
217 op->u.getpageframeinfo.type = NOTAB;
219 if ( (page->u.inuse.type_info & PGT_count_mask) != 0 )
220 {
221 switch ( page->u.inuse.type_info & PGT_type_mask )
222 {
223 case PGT_l1_page_table:
224 op->u.getpageframeinfo.type = L1TAB;
225 break;
226 case PGT_l2_page_table:
227 op->u.getpageframeinfo.type = L2TAB;
228 break;
229 case PGT_l3_page_table:
230 op->u.getpageframeinfo.type = L3TAB;
231 break;
232 case PGT_l4_page_table:
233 op->u.getpageframeinfo.type = L4TAB;
234 break;
235 }
236 }
238 put_page(page);
239 }
241 put_domain(d);
243 copy_to_user(u_dom0_op, op, sizeof(*op));
244 }
245 break;
247 case DOM0_GETPAGEFRAMEINFO2:
248 {
249 #define GPF2_BATCH (PAGE_SIZE / sizeof(unsigned long))
250 int n,j;
251 int num = op->u.getpageframeinfo2.num;
252 domid_t dom = op->u.getpageframeinfo2.domain;
253 unsigned long *s_ptr = (unsigned long*) op->u.getpageframeinfo2.array;
254 struct domain *d;
255 unsigned long *l_arr;
256 ret = -ESRCH;
258 if ( unlikely((d = find_domain_by_id(dom)) == NULL) )
259 break;
261 if ( unlikely(num > 1024) )
262 {
263 ret = -E2BIG;
264 put_domain(d);
265 break;
266 }
268 l_arr = alloc_xenheap_page();
270 ret = 0;
271 for( n = 0; n < num; )
272 {
273 int k = ((num-n)>GPF2_BATCH)?GPF2_BATCH:(num-n);
275 if ( copy_from_user(l_arr, &s_ptr[n], k*sizeof(unsigned long)) )
276 {
277 ret = -EINVAL;
278 break;
279 }
281 for( j = 0; j < k; j++ )
282 {
283 struct pfn_info *page;
284 unsigned long mfn = l_arr[j];
286 page = pfn_to_page(mfn);
288 if ( likely(pfn_valid(mfn) && get_page(page, d)) )
289 {
290 unsigned long type = 0;
292 switch( page->u.inuse.type_info & PGT_type_mask )
293 {
294 case PGT_l1_page_table:
295 type = L1TAB;
296 break;
297 case PGT_l2_page_table:
298 type = L2TAB;
299 break;
300 case PGT_l3_page_table:
301 type = L3TAB;
302 break;
303 case PGT_l4_page_table:
304 type = L4TAB;
305 break;
306 }
308 if ( page->u.inuse.type_info & PGT_pinned )
309 type |= LPINTAB;
310 l_arr[j] |= type;
311 put_page(page);
312 }
313 else
314 l_arr[j] |= XTAB;
316 }
318 if ( copy_to_user(&s_ptr[n], l_arr, k*sizeof(unsigned long)) )
319 {
320 ret = -EINVAL;
321 break;
322 }
324 n += k;
325 }
327 free_xenheap_page(l_arr);
329 put_domain(d);
330 }
331 break;
333 case DOM0_GETMEMLIST:
334 {
335 int i;
336 struct domain *d = find_domain_by_id(op->u.getmemlist.domain);
337 unsigned long max_pfns = op->u.getmemlist.max_pfns;
338 unsigned long pfn;
339 unsigned long *buffer = op->u.getmemlist.buffer;
340 struct list_head *list_ent;
342 ret = -EINVAL;
343 if ( d != NULL )
344 {
345 ret = 0;
347 spin_lock(&d->page_alloc_lock);
348 list_ent = d->page_list.next;
349 for ( i = 0; (i < max_pfns) && (list_ent != &d->page_list); i++ )
350 {
351 pfn = page_to_pfn(list_entry(list_ent, struct pfn_info, list));
352 if ( put_user(pfn, buffer) )
353 {
354 ret = -EFAULT;
355 break;
356 }
357 buffer++;
358 list_ent = pfn_to_page(pfn)->list.next;
359 }
360 spin_unlock(&d->page_alloc_lock);
362 op->u.getmemlist.num_pfns = i;
363 copy_to_user(u_dom0_op, op, sizeof(*op));
365 put_domain(d);
366 }
367 }
368 break;
370 case DOM0_PLATFORM_QUIRK:
371 {
372 extern int opt_noirqbalance;
373 switch ( op->u.platform_quirk.quirk_id )
374 {
375 case QUIRK_NOIRQBALANCING:
376 printk("Platform quirk -- Disabling IRQ balancing/affinity.\n");
377 opt_noirqbalance = 1;
378 setup_ioapic_dest();
379 break;
380 default:
381 ret = -EINVAL;
382 break;
383 }
384 }
385 break;
387 case DOM0_PHYSICAL_MEMORY_MAP:
388 {
389 struct dom0_memory_map_entry entry;
390 int i;
392 for ( i = 0; i < e820.nr_map; i++ )
393 {
394 if ( i >= op->u.physical_memory_map.max_map_entries )
395 break;
396 entry.start = e820.map[i].addr;
397 entry.end = e820.map[i].addr + e820.map[i].size;
398 entry.is_ram = (e820.map[i].type == E820_RAM);
399 (void)copy_to_user(
400 &op->u.physical_memory_map.memory_map[i],
401 &entry, sizeof(entry));
402 }
404 op->u.physical_memory_map.nr_map_entries = i;
405 (void)copy_to_user(u_dom0_op, op, sizeof(*op));
406 }
407 break;
409 case DOM0_HYPERCALL_INIT:
410 {
411 struct domain *d;
412 unsigned long mfn = op->u.hypercall_init.mfn;
413 void *hypercall_page;
415 ret = -ESRCH;
416 if ( unlikely((d = find_domain_by_id(
417 op->u.hypercall_init.domain)) == NULL) )
418 break;
420 ret = -EACCES;
421 if ( !pfn_valid(mfn) ||
422 !get_page_and_type(pfn_to_page(mfn), d, PGT_writable_page) )
423 {
424 put_domain(d);
425 break;
426 }
428 ret = 0;
430 hypercall_page = map_domain_page(mfn);
431 hypercall_page_initialise(hypercall_page);
432 unmap_domain_page(hypercall_page);
434 put_page_and_type(pfn_to_page(mfn));
436 put_domain(d);
437 }
438 break;
440 default:
441 ret = -ENOSYS;
442 break;
443 }
445 return ret;
446 }
448 void arch_getdomaininfo_ctxt(
449 struct vcpu *v, struct vcpu_guest_context *c)
450 {
451 extern void save_vmx_cpu_user_regs(struct cpu_user_regs *);
453 memcpy(c, &v->arch.guest_context, sizeof(*c));
455 if ( VMX_DOMAIN(v) )
456 {
457 save_vmx_cpu_user_regs(&c->user_regs);
458 __vmread(CR0_READ_SHADOW, &c->ctrlreg[0]);
459 __vmread(CR4_READ_SHADOW, &c->ctrlreg[4]);
460 }
461 else
462 {
463 /* IOPL privileges are virtualised: merge back into returned eflags. */
464 BUG_ON((c->user_regs.eflags & EF_IOPL) != 0);
465 c->user_regs.eflags |= v->arch.iopl << 12;
466 }
468 c->flags = 0;
469 if ( test_bit(_VCPUF_fpu_initialised, &v->vcpu_flags) )
470 c->flags |= VGCF_I387_VALID;
471 if ( KERNEL_MODE(v, &v->arch.guest_context.user_regs) )
472 c->flags |= VGCF_IN_KERNEL;
473 if (VMX_DOMAIN(v))
474 c->flags |= VGCF_VMX_GUEST;
476 c->ctrlreg[3] = pagetable_get_paddr(v->arch.guest_table);
478 c->vm_assist = v->domain->vm_assist;
479 }