ia64/xen-unstable

view xen/arch/powerpc/domain_build.c @ 13937:cc1eb0689bcf

[XEN][POWERPC] Allocate secondary VCPUs for Dom0

Signed-off-by: Jimi Xenidis <jimix@watson.ibm.com>
author Jimi Xenidis <jimix@watson.ibm.com>
date Sun Jan 21 07:49:50 2007 -0500 (2007-01-21)
parents bbd1c469ff5b
children 4ce0b332b572
line source
1 /*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
15 *
16 * Copyright (C) IBM Corp. 2005
17 *
18 * Authors: Jimi Xenidis <jimix@watson.ibm.com>
19 */
21 #include <xen/config.h>
22 #include <xen/lib.h>
23 #include <xen/elf.h>
24 #include <xen/sched.h>
25 #include <xen/init.h>
26 #include <xen/ctype.h>
27 #include <xen/iocap.h>
28 #include <xen/shadow.h>
29 #include <xen/domain.h>
30 #include <xen/version.h>
31 #include <asm/processor.h>
32 #include <asm/papr.h>
33 #include "oftree.h"
35 extern int parseelfimage_32(struct domain_setup_info *dsi);
36 extern int loadelfimage_32(struct domain_setup_info *dsi);
38 /* opt_dom0_mem: memory allocated to domain 0. */
39 static unsigned int dom0_nrpages;
40 static void parse_dom0_mem(char *s)
41 {
42 unsigned long long bytes;
44 bytes = parse_size_and_unit(s, NULL);
45 dom0_nrpages = bytes >> PAGE_SHIFT;
46 }
47 custom_param("dom0_mem", parse_dom0_mem);
49 static unsigned int opt_dom0_max_vcpus;
50 integer_param("dom0_max_vcpus", opt_dom0_max_vcpus);
52 static unsigned int opt_dom0_shadow;
53 boolean_param("dom0_shadow", opt_dom0_shadow);
55 int elf_sanity_check(const Elf_Ehdr *ehdr)
56 {
57 if (IS_ELF(*ehdr))
58 /* we are happy with either */
59 if ((ehdr->e_ident[EI_CLASS] == ELFCLASS32
60 && ehdr->e_machine == EM_PPC)
61 || (ehdr->e_ident[EI_CLASS] == ELFCLASS64
62 && ehdr->e_machine == EM_PPC64)) {
63 if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB
64 && ehdr->e_type == ET_EXEC)
65 return 1;
66 }
67 printk("DOM0 image is not a Xen-compatible Elf image.\n");
68 return 0;
69 }
71 /* adapted from common/elf.c */
72 #define RM_MASK(a,l) ((a) & ((1UL << (l)) - 1))
74 static int rm_loadelfimage_64(struct domain_setup_info *dsi, ulong rma)
75 {
76 char *elfbase = (char *)dsi->image_addr;
77 Elf64_Ehdr *ehdr = (Elf64_Ehdr *)dsi->image_addr;
78 Elf64_Phdr *phdr;
79 int h;
81 for (h = 0; h < ehdr->e_phnum; h++ )
82 {
83 phdr = (Elf64_Phdr *)(elfbase + ehdr->e_phoff + (h*ehdr->e_phentsize));
84 if (!((phdr->p_type == PT_LOAD) &&
85 ((phdr->p_flags & (PF_W|PF_X)) != 0)))
86 continue;
88 if (phdr->p_filesz != 0)
89 memcpy((char *)(rma + RM_MASK(phdr->p_paddr, 42)),
90 elfbase + phdr->p_offset,
91 phdr->p_filesz);
92 if (phdr->p_memsz > phdr->p_filesz)
93 memset((char *)(rma + RM_MASK(phdr->p_paddr, 42) + phdr->p_filesz),
94 0, phdr->p_memsz - phdr->p_filesz);
95 }
97 #ifdef NOT_YET
98 loadelfsymtab(dsi, 1);
99 #endif
101 return 0;
102 }
104 int construct_dom0(struct domain *d,
105 unsigned long image_start, unsigned long image_len,
106 unsigned long initrd_start, unsigned long initrd_len,
107 char *cmdline)
108 {
109 int rc;
110 struct vcpu *v = d->vcpu[0];
111 struct domain_setup_info dsi;
112 ulong dst;
113 u64 *ofh_tree;
114 uint rma_nrpages = 1 << d->arch.rma_order;
115 ulong rma_sz = rma_size(d->arch.rma_order);
116 ulong rma = page_to_maddr(d->arch.rma_page);
117 start_info_t *si;
118 ulong eomem;
119 int am64 = 1;
120 int preempt = 0;
121 ulong msr;
122 ulong pc;
123 ulong r2;
124 int vcpu;
126 /* Sanity! */
127 BUG_ON(d->domain_id != 0);
128 BUG_ON(d->vcpu[0] == NULL);
130 if (image_len == 0)
131 panic("No Dom0 image supplied\n");
133 cpu_init_vcpu(v);
135 memset(&dsi, 0, sizeof(struct domain_setup_info));
136 dsi.image_addr = image_start;
137 dsi.image_len = image_len;
139 printk("Trying Dom0 as 64bit ELF\n");
140 if ((rc = parseelfimage(&dsi)) != 0) {
141 printk("Trying Dom0 as 32bit ELF\n");
142 if ((rc = parseelfimage_32(&dsi)) != 0)
143 return rc;
144 am64 = 0;
145 }
147 /* elf contains virtual addresses that can have the upper bits
148 * masked while running in real mode, so we do the masking as well
149 * as well */
150 dsi.v_kernstart = RM_MASK(dsi.v_kernstart, 42);
151 dsi.v_kernend = RM_MASK(dsi.v_kernend, 42);
152 dsi.v_kernentry = RM_MASK(dsi.v_kernentry, 42);
154 printk("*** LOADING DOMAIN 0 ***\n");
156 /* By default DOM0 is allocated all available memory. */
157 d->max_pages = ~0U;
159 /* default is the max(1/16th of memory, CONFIG_MIN_DOM0_PAGES) */
160 if (dom0_nrpages == 0) {
161 dom0_nrpages = total_pages >> 4;
163 if (dom0_nrpages < CONFIG_MIN_DOM0_PAGES)
164 dom0_nrpages = CONFIG_MIN_DOM0_PAGES;
165 }
167 /* make sure we are at least as big as the RMA */
168 if (dom0_nrpages > rma_nrpages)
169 dom0_nrpages = allocate_extents(d, dom0_nrpages, rma_nrpages);
171 ASSERT(d->tot_pages == dom0_nrpages);
172 ASSERT(d->tot_pages >= rma_nrpages);
174 if (opt_dom0_shadow == 0) {
175 /* 1/64 of memory */
176 opt_dom0_shadow = (d->tot_pages >> 6) >> (20 - PAGE_SHIFT);
177 }
179 do {
180 shadow_set_allocation(d, opt_dom0_shadow, &preempt);
181 } while (preempt);
182 if (shadow_get_allocation(d) == 0)
183 panic("shadow allocation failed: %dMib\n", opt_dom0_shadow);
185 ASSERT( image_len < rma_sz );
187 si = (start_info_t *)(rma_addr(&d->arch, RMA_START_INFO) + rma);
188 printk("xen_start_info: %p\n", si);
190 sprintf(si->magic, "xen-%i.%i-powerpc%d%s",
191 xen_major_version(), xen_minor_version(), BITS_PER_LONG, "HV");
192 si->flags = SIF_PRIVILEGED | SIF_INITDOMAIN;
194 si->shared_info = ((ulong)d->shared_info) - rma;
195 printk("shared_info: 0x%lx,%p\n", si->shared_info, d->shared_info);
197 eomem = si->shared_info;
199 /* number of pages accessible */
200 si->nr_pages = rma_sz >> PAGE_SHIFT;
202 si->pt_base = 0;
203 si->nr_pt_frames = 0;
204 si->mfn_list = 0;
206 /* OF usually sits here:
207 * - Linux needs it to be loaded before the vmlinux or initrd
208 * - AIX demands it to be @ 32M.
209 */
210 dst = (32 << 20);
212 /* put stack below everything */
213 v->arch.ctxt.gprs[1] = dst - STACK_FRAME_OVERHEAD;
215 /* startup secondary processors */
216 if ( opt_dom0_max_vcpus == 0 )
217 opt_dom0_max_vcpus = num_online_cpus();
218 if ( opt_dom0_max_vcpus > num_online_cpus() )
219 opt_dom0_max_vcpus = num_online_cpus();
220 if ( opt_dom0_max_vcpus > MAX_VIRT_CPUS )
221 opt_dom0_max_vcpus = MAX_VIRT_CPUS;
222 #ifdef BITS_PER_GUEST_LONG
223 if ( opt_dom0_max_vcpus > BITS_PER_GUEST_LONG(d) )
224 opt_dom0_max_vcpus = BITS_PER_GUEST_LONG(d);
225 #endif
226 printk("Dom0 has maximum %u VCPUs\n", opt_dom0_max_vcpus);
228 for (vcpu = 1; vcpu < opt_dom0_max_vcpus; vcpu++) {
229 if (NULL == alloc_vcpu(dom0, vcpu, vcpu))
230 panic("Error creating domain 0 vcpu %d\n", vcpu);
231 /* for now we pin Dom0 VCPUs to their coresponding CPUs */
232 if (cpu_isset(vcpu, cpu_online_map))
233 dom0->vcpu[vcpu]->cpu_affinity = cpumask_of_cpu(vcpu);
234 }
236 /* copy relative to Xen */
237 dst += rma;
239 ASSERT((dst - rma) + (ulong)firmware_image_size < eomem);
240 printk("loading OFH: 0x%lx, RMA: 0x%lx\n", dst, dst - rma);
241 memcpy((void *)dst, firmware_image_start, (ulong)firmware_image_size);
243 v->arch.ctxt.gprs[5] = (dst - rma);
244 ofh_tree = (u64 *)(dst + 0x10);
245 ASSERT(*ofh_tree == 0xdeadbeef00000000);
247 /* accomodate for a modest bss section */
248 dst = ALIGN_UP(dst + (ulong)firmware_image_size + PAGE_SIZE, PAGE_SIZE);
249 ASSERT((dst - rma) + oftree_len < eomem);
251 *ofh_tree = dst - rma;
252 printk("loading OFD: 0x%lx RMA: 0x%lx, 0x%lx\n", dst, dst - rma,
253 oftree_len);
254 memcpy((void *)dst, (void *)oftree, oftree_len);
256 dst = ALIGN_UP(dst + oftree_len, PAGE_SIZE);
258 if (am64) {
259 ulong kbase;
260 ulong *fdesc;
262 printk("loading 64-bit Dom0: 0x%lx, in RMA:0x%lx\n", dst, dst - rma);
263 rm_loadelfimage_64(&dsi, dst);
265 kbase = dst;
266 /* move dst to end of bss */
267 dst = ALIGN_UP(dsi.v_kernend + dst, PAGE_SIZE);
269 if ( initrd_len > 0 ) {
270 ASSERT( (dst - rma) + image_len < eomem );
272 printk("loading initrd: 0x%lx, 0x%lx\n", dst, initrd_len);
273 memcpy((void *)dst, (void *)initrd_start, initrd_len);
275 si->mod_start = dst - rma;
276 si->mod_len = image_len;
278 dst = ALIGN_UP(dst + initrd_len, PAGE_SIZE);
279 } else {
280 printk("no initrd\n");
281 si->mod_start = 0;
282 si->mod_len = 0;
283 }
284 /* it may be a function descriptor */
285 fdesc = (ulong *)(dsi.v_kernstart + dsi.v_kernentry + kbase);
287 if (fdesc[2] == 0
288 && ((fdesc[0] >= dsi.v_kernstart)
289 && (fdesc[0] < dsi.v_kernend)) /* text entry is in range */
290 && ((fdesc[1] >= dsi.v_kernstart) /* toc can be > image */
291 && (fdesc[1] < (dsi.v_kernend + (0x7fff * sizeof (ulong)))))) {
292 /* it is almost certainly a function descriptor */
293 pc = RM_MASK(fdesc[0], 42) + kbase - rma;
294 r2 = RM_MASK(fdesc[1], 42) + kbase - rma;
295 } else {
296 pc = ((ulong)fdesc) - rma;
297 r2 = 0;
298 }
299 msr = MSR_SF;
300 } else {
301 printk("loading 32-bit Dom0: 0x%lx, in RMA:0x%lx\n",
302 dsi.v_kernstart + rma, dsi.v_kernstart);
303 dsi.v_start = rma;
304 loadelfimage_32(&dsi);
306 pc = dsi.v_kernentry;
307 r2 = 0;
308 msr = 0;
309 }
311 v->arch.ctxt.gprs[3] = si->mod_start;
312 v->arch.ctxt.gprs[4] = si->mod_len;
314 memset(si->cmd_line, 0, sizeof(si->cmd_line));
315 if ( cmdline != NULL )
316 strncpy((char *)si->cmd_line, cmdline, sizeof(si->cmd_line)-1);
318 v->arch.ctxt.msr = msr;
319 v->arch.ctxt.pc = pc;
320 v->arch.ctxt.gprs[2] = r2;
322 printk("DOM: pc = 0x%lx, r2 = 0x%lx\n", pc, r2);
324 ofd_dom0_fixup(d, *ofh_tree + rma, si);
326 set_bit(_VCPUF_initialised, &v->vcpu_flags);
328 rc = 0;
330 /* DOM0 is permitted full I/O capabilities. */
331 rc |= iomem_permit_access(dom0, 0UL, ~0UL);
332 rc |= irqs_permit_access(dom0, 0, NR_IRQS-1);
334 BUG_ON(rc != 0);
336 return 0;
337 }