ia64/xen-unstable

view xen/arch/ia64/xen/dom0_ops.c @ 13434:7d8670a30445

[IA64] Change vCPU initialization to avoid domVTi privregs memory leak

1) This patch moved some processing from vcpu_initialise() and
added a new function vcpu_late_initialise().
It executes the following initializations for VCPU of
dom0/domU.
- Allocate the VHPT
- Allocate the privregs area and assign these pages into
guest pseudo physical address space.
- Set the tlbflush_timestamp.

It is executed in the following sequence.

dom0:
start_kernel()
->domain_create()
->alloc_vcpu(VCPU0)
->alloc_vcpu_struct(VCPU0)
->vcpu_initialise(VCPU0)
->vcpu_late_initialise(VCPU0)

->construct_dom0
->alloc_vcpu(othe VCPUs)
->alloc_vcpu_struct(other VCPUs)
->vcpu_initialise(other VCPUs)

ia64_hypercall(FW_HYPERCALL_IPI)
->fw_hypercall_ipi(XEN_SAL_BOOT_RENDEZ_VEC)
->arch_set_info_guest(other VCPUs)
->vcpu_late_initialise(other VCPUs)

domU:
do_domctl(XEN_DOMCTL_createdomain)
->domain_create()

do_domctl(XEN_DOMCTL_max_vcpus)
->alloc_vcpu(all VCPUs)
->alloc_vcpu_struct(all VCPUs)
->vcpu_initialise(all VCPUs)

do_domctl(XEN_DOMCTL_setvcpucontext)
->set_info_guest(VCPU0)
->arch_set_info_guest(VCPU0)
->vcpu_late_initialise(VCPU0)

ia64_hypercall(FW_HYPERCALL_IPI)
->fw_hypercall_ipi(XEN_SAL_BOOT_RENDEZ_VEC)
->arch_set_info_guest(other VCPUs)
->vcpu_late_initialise(other VCPUs)


2) This patch modified the domain_set_shared_info_va().
Currently, initialization of arch.privregs->interrupt_mask_addr
of all VCPUs is executed in domain_set_shared_info_va().
However, allocation of privregs area is late by modified of 1).
Therefore, this patch modified initialization of
arch.privregs->interrupt_mask_addr to the following sequence.

dom0 and domU:
ia64_hypercall(FW_HYPERCALL_SET_SHARED_INFO_VA)
->domain_set_shared_info_va()
Initialize interrupt_mask_addr of VCPU0

ia64_hypercall(FW_HYPERCALL_IPI)
->fw_hypercall_ipi(XEN_SAL_BOOT_RENDEZ_VEC)
->arch_set_info_guest(other VCPUs)
->vcpu_late_initialise(other VCPUs)
Initialize interrupt_mask_addr of other VCPUs


Signed-off-by: Masaki Kanno <kanno.masaki@jp.fujitsu.com>
author awilliam@xenbuild2.aw
date Tue Jan 16 10:40:37 2007 -0700 (2007-01-16)
parents 0f571adbd700
children 30af6cfdb05c
line source
1 /******************************************************************************
2 * Arch-specific dom0_ops.c
3 *
4 * Process command requests from domain-0 guest OS.
5 *
6 * Copyright (c) 2002, K A Fraser
7 */
9 #include <xen/config.h>
10 #include <xen/types.h>
11 #include <xen/lib.h>
12 #include <xen/mm.h>
13 #include <public/domctl.h>
14 #include <public/sysctl.h>
15 #include <xen/sched.h>
16 #include <xen/event.h>
17 #include <asm/pdb.h>
18 #include <xen/trace.h>
19 #include <xen/console.h>
20 #include <xen/guest_access.h>
21 #include <asm/vmx.h>
22 #include <asm/dom_fw.h>
23 #include <xen/iocap.h>
24 #include <xen/errno.h>
25 #include <xen/nodemask.h>
27 #define get_xen_guest_handle(val, hnd) do { val = (hnd).p; } while (0)
29 extern unsigned long total_pages;
31 long arch_do_domctl(xen_domctl_t *op, XEN_GUEST_HANDLE(xen_domctl_t) u_domctl)
32 {
33 long ret = 0;
35 if ( !IS_PRIV(current->domain) )
36 return -EPERM;
38 switch ( op->cmd )
39 {
40 case XEN_DOMCTL_getmemlist:
41 {
42 unsigned long i;
43 struct domain *d = find_domain_by_id(op->domain);
44 unsigned long start_page = op->u.getmemlist.start_pfn;
45 unsigned long nr_pages = op->u.getmemlist.max_pfns;
46 unsigned long mfn;
48 if ( d == NULL ) {
49 ret = -EINVAL;
50 break;
51 }
52 for (i = 0 ; i < nr_pages ; i++) {
53 pte_t *pte;
55 pte = (pte_t *)lookup_noalloc_domain_pte(d,
56 (start_page + i) << PAGE_SHIFT);
57 if (pte && pte_present(*pte))
58 mfn = start_page + i;
59 else
60 mfn = INVALID_MFN;
62 if ( copy_to_guest_offset(op->u.getmemlist.buffer, i, &mfn, 1) ) {
63 ret = -EFAULT;
64 break;
65 }
66 }
68 op->u.getmemlist.num_pfns = i;
69 if (copy_to_guest(u_domctl, op, 1))
70 ret = -EFAULT;
72 put_domain(d);
73 }
74 break;
76 case XEN_DOMCTL_arch_setup:
77 {
78 xen_domctl_arch_setup_t *ds = &op->u.arch_setup;
79 struct domain *d = find_domain_by_id(op->domain);
81 if ( d == NULL) {
82 ret = -EINVAL;
83 break;
84 }
86 if (ds->flags & XEN_DOMAINSETUP_query) {
87 /* Set flags. */
88 if (d->arch.is_vti)
89 ds->flags |= XEN_DOMAINSETUP_hvm_guest;
90 /* Set params. */
91 ds->bp = 0; /* unknown. */
92 ds->maxmem = 0; /* unknown. */
93 ds->xsi_va = d->arch.shared_info_va;
94 ds->hypercall_imm = d->arch.breakimm;
95 /* Copy back. */
96 if ( copy_to_guest(u_domctl, op, 1) )
97 ret = -EFAULT;
98 }
99 else {
100 if (ds->flags & XEN_DOMAINSETUP_hvm_guest) {
101 if (!vmx_enabled) {
102 printk("No VMX hardware feature for vmx domain.\n");
103 ret = -EINVAL;
104 break;
105 }
106 d->arch.is_vti = 1;
107 vmx_setup_platform(d);
108 }
109 else {
110 dom_fw_setup(d, ds->bp, ds->maxmem);
111 if (ds->xsi_va)
112 d->arch.shared_info_va = ds->xsi_va;
113 if (ds->hypercall_imm) {
114 struct vcpu *v;
115 d->arch.breakimm = ds->hypercall_imm;
116 for_each_vcpu (d, v)
117 v->arch.breakimm = d->arch.breakimm;
118 }
119 {
120 /*
121 * XXX IA64_SHARED_INFO_PADDR
122 * assign these pages into guest psudo physical address
123 * space for dom0 to map this page by gmfn.
124 * this is necessary for domain build, save, restore and
125 * dump-core.
126 */
127 unsigned long i;
128 for (i = 0; i < XSI_SIZE; i += PAGE_SIZE)
129 assign_domain_page(d, IA64_SHARED_INFO_PADDR + i,
130 virt_to_maddr(d->shared_info + i));
131 }
132 }
133 }
135 put_domain(d);
136 }
137 break;
139 case XEN_DOMCTL_shadow_op:
140 {
141 struct domain *d;
142 ret = -ESRCH;
143 d = find_domain_by_id(op->domain);
144 if ( d != NULL )
145 {
146 ret = shadow_mode_control(d, &op->u.shadow_op);
147 put_domain(d);
148 copy_to_guest(u_domctl, op, 1);
149 }
150 }
151 break;
153 case XEN_DOMCTL_ioport_permission:
154 {
155 struct domain *d;
156 unsigned int fp = op->u.ioport_permission.first_port;
157 unsigned int np = op->u.ioport_permission.nr_ports;
158 unsigned int lp = fp + np - 1;
160 ret = -ESRCH;
161 d = find_domain_by_id(op->domain);
162 if (unlikely(d == NULL))
163 break;
165 if (np == 0)
166 ret = 0;
167 else {
168 if (op->u.ioport_permission.allow_access)
169 ret = ioports_permit_access(d, fp, lp);
170 else
171 ret = ioports_deny_access(d, fp, lp);
172 }
174 put_domain(d);
175 }
176 break;
177 default:
178 printk("arch_do_domctl: unrecognized domctl: %d!!!\n",op->cmd);
179 ret = -ENOSYS;
181 }
183 return ret;
184 }
186 /*
187 * Temporarily disable the NUMA PHYSINFO code until the rest of the
188 * changes are upstream.
189 */
190 #undef IA64_NUMA_PHYSINFO
192 long arch_do_sysctl(xen_sysctl_t *op, XEN_GUEST_HANDLE(xen_sysctl_t) u_sysctl)
193 {
194 long ret = 0;
196 switch ( op->cmd )
197 {
198 case XEN_SYSCTL_physinfo:
199 {
200 #ifdef IA64_NUMA_PHYSINFO
201 int i;
202 node_data_t *chunks;
203 u64 *map, cpu_to_node_map[MAX_NUMNODES];
204 #endif
206 xen_sysctl_physinfo_t *pi = &op->u.physinfo;
208 pi->threads_per_core =
209 cpus_weight(cpu_sibling_map[0]);
210 pi->cores_per_socket =
211 cpus_weight(cpu_core_map[0]) / pi->threads_per_core;
212 pi->sockets_per_node =
213 num_online_cpus() / cpus_weight(cpu_core_map[0]);
214 #ifndef IA64_NUMA_PHYSINFO
215 pi->nr_nodes = 1;
216 #endif
217 pi->total_pages = total_pages;
218 pi->free_pages = avail_domheap_pages();
219 pi->scrub_pages = avail_scrub_pages();
220 pi->cpu_khz = local_cpu_data->proc_freq / 1000;
221 memset(pi->hw_cap, 0, sizeof(pi->hw_cap));
222 //memcpy(pi->hw_cap, boot_cpu_data.x86_capability, NCAPINTS*4);
223 ret = 0;
225 #ifdef IA64_NUMA_PHYSINFO
226 /* fetch memory_chunk pointer from guest */
227 get_xen_guest_handle(chunks, pi->memory_chunks);
229 printk("chunks=%p, num_node_memblks=%u\n", chunks, num_node_memblks);
230 /* if it is set, fill out memory chunk array */
231 if (chunks != NULL) {
232 if (num_node_memblks == 0) {
233 /* Non-NUMA machine. Put pseudo-values. */
234 node_data_t data;
235 data.node_start_pfn = 0;
236 data.node_spanned_pages = total_pages;
237 data.node_id = 0;
238 /* copy memory chunk structs to guest */
239 if (copy_to_guest_offset(pi->memory_chunks, 0, &data, 1)) {
240 ret = -EFAULT;
241 break;
242 }
243 } else {
244 for (i = 0; i < num_node_memblks && i < PUBLIC_MAXCHUNKS; i++) {
245 node_data_t data;
246 data.node_start_pfn = node_memblk[i].start_paddr >>
247 PAGE_SHIFT;
248 data.node_spanned_pages = node_memblk[i].size >> PAGE_SHIFT;
249 data.node_id = node_memblk[i].nid;
250 /* copy memory chunk structs to guest */
251 if (copy_to_guest_offset(pi->memory_chunks, i, &data, 1)) {
252 ret = -EFAULT;
253 break;
254 }
255 }
256 }
257 }
258 /* set number of notes */
259 pi->nr_nodes = num_online_nodes();
261 /* fetch cpu_to_node pointer from guest */
262 get_xen_guest_handle(map, pi->cpu_to_node);
264 /* if set, fill out cpu_to_node array */
265 if (map != NULL) {
266 /* copy cpu to node mapping to domU */
267 memset(cpu_to_node_map, 0, sizeof(cpu_to_node_map));
268 for (i = 0; i < num_online_cpus(); i++) {
269 cpu_to_node_map[i] = cpu_to_node(i);
270 if (copy_to_guest_offset(pi->cpu_to_node, i,
271 &(cpu_to_node_map[i]), 1)) {
272 ret = -EFAULT;
273 break;
274 }
275 }
276 }
277 #endif
279 if ( copy_to_guest(u_sysctl, op, 1) )
280 ret = -EFAULT;
281 }
282 break;
284 default:
285 printk("arch_do_sysctl: unrecognized sysctl: %d!!!\n",op->cmd);
286 ret = -ENOSYS;
288 }
290 return ret;
291 }
293 static unsigned long
294 dom0vp_ioremap(struct domain *d, unsigned long mpaddr, unsigned long size)
295 {
296 unsigned long end;
298 /* Linux may use a 0 size! */
299 if (size == 0)
300 size = PAGE_SIZE;
302 end = PAGE_ALIGN(mpaddr + size);
304 if (!iomem_access_permitted(d, mpaddr >> PAGE_SHIFT,
305 (end >> PAGE_SHIFT) - 1))
306 return -EPERM;
308 return assign_domain_mmio_page(d, mpaddr, size);
309 }
311 unsigned long
312 do_dom0vp_op(unsigned long cmd,
313 unsigned long arg0, unsigned long arg1, unsigned long arg2,
314 unsigned long arg3)
315 {
316 unsigned long ret = 0;
317 struct domain *d = current->domain;
319 switch (cmd) {
320 case IA64_DOM0VP_ioremap:
321 ret = dom0vp_ioremap(d, arg0, arg1);
322 break;
323 case IA64_DOM0VP_phystomach:
324 ret = ____lookup_domain_mpa(d, arg0 << PAGE_SHIFT);
325 if (ret == INVALID_MFN) {
326 dprintk(XENLOG_INFO, "%s: INVALID_MFN ret: 0x%lx\n",
327 __func__, ret);
328 } else {
329 ret = (ret & _PFN_MASK) >> PAGE_SHIFT;//XXX pte_pfn()
330 }
331 perfc_incrc(dom0vp_phystomach);
332 break;
333 case IA64_DOM0VP_machtophys:
334 if (!mfn_valid(arg0)) {
335 ret = INVALID_M2P_ENTRY;
336 break;
337 }
338 ret = get_gpfn_from_mfn(arg0);
339 perfc_incrc(dom0vp_machtophys);
340 break;
341 case IA64_DOM0VP_zap_physmap:
342 ret = dom0vp_zap_physmap(d, arg0, (unsigned int)arg1);
343 break;
344 case IA64_DOM0VP_add_physmap:
345 ret = dom0vp_add_physmap(d, arg0, arg1, (unsigned int)arg2,
346 (domid_t)arg3);
347 break;
348 case IA64_DOM0VP_add_physmap_with_gmfn:
349 ret = dom0vp_add_physmap_with_gmfn(d, arg0, arg1, (unsigned int)arg2,
350 (domid_t)arg3);
351 break;
352 case IA64_DOM0VP_expose_p2m:
353 ret = dom0vp_expose_p2m(d, arg0, arg1, arg2, arg3);
354 break;
355 case IA64_DOM0VP_perfmon: {
356 XEN_GUEST_HANDLE(void) hnd;
357 set_xen_guest_handle(hnd, (void*)arg1);
358 ret = do_perfmon_op(arg0, hnd, arg2);
359 break;
360 }
361 default:
362 ret = -1;
363 printk("unknown dom0_vp_op 0x%lx\n", cmd);
364 break;
365 }
367 return ret;
368 }
370 /*
371 * Local variables:
372 * mode: C
373 * c-set-style: "BSD"
374 * c-basic-offset: 4
375 * tab-width: 4
376 * indent-tabs-mode: nil
377 * End:
378 */