ia64/xen-unstable

view xen/common/kexec.c @ 18594:5e4e234d58be

x86: Define __per_cpu_shift label to help kdump/crashdump.
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Wed Oct 08 13:11:06 2008 +0100 (2008-10-08)
parents 8a0415fac759
children 845aa241e163
line source
1 /******************************************************************************
2 * kexec.c - Achitecture independent kexec code for Xen
3 *
4 * Xen port written by:
5 * - Simon 'Horms' Horman <horms@verge.net.au>
6 * - Magnus Damm <magnus@valinux.co.jp>
7 */
9 #include <xen/lib.h>
10 #include <xen/ctype.h>
11 #include <xen/errno.h>
12 #include <xen/guest_access.h>
13 #include <xen/sched.h>
14 #include <xen/types.h>
15 #include <xen/kexec.h>
16 #include <xen/keyhandler.h>
17 #include <public/kexec.h>
18 #include <xen/cpumask.h>
19 #include <asm/atomic.h>
20 #include <xen/spinlock.h>
21 #include <xen/version.h>
22 #include <xen/console.h>
23 #include <xen/kexec.h>
24 #include <public/elfnote.h>
25 #include <xsm/xsm.h>
26 #ifdef CONFIG_COMPAT
27 #include <compat/kexec.h>
28 #endif
30 static DEFINE_PER_CPU(void *, crash_notes);
32 static Elf_Note *xen_crash_note;
34 static cpumask_t crash_saved_cpus;
36 static xen_kexec_image_t kexec_image[KEXEC_IMAGE_NR];
38 #define KEXEC_FLAG_DEFAULT_POS (KEXEC_IMAGE_NR + 0)
39 #define KEXEC_FLAG_CRASH_POS (KEXEC_IMAGE_NR + 1)
40 #define KEXEC_FLAG_IN_PROGRESS (KEXEC_IMAGE_NR + 2)
42 static unsigned long kexec_flags = 0; /* the lowest bits are for KEXEC_IMAGE... */
44 static spinlock_t kexec_lock = SPIN_LOCK_UNLOCKED;
46 static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES];
47 static size_t vmcoreinfo_size = 0;
49 xen_kexec_reserve_t kexec_crash_area;
51 static void __init parse_crashkernel(const char *str)
52 {
53 kexec_crash_area.size = parse_size_and_unit(str, &str);
54 if ( *str == '@' )
55 kexec_crash_area.start = parse_size_and_unit(str+1, NULL);
56 }
57 custom_param("crashkernel", parse_crashkernel);
59 static void one_cpu_only(void)
60 {
61 /* Only allow the first cpu to continue - force other cpus to spin */
62 if ( test_and_set_bit(KEXEC_FLAG_IN_PROGRESS, &kexec_flags) )
63 for ( ; ; ) ;
64 }
66 /* Save the registers in the per-cpu crash note buffer. */
67 void kexec_crash_save_cpu(void)
68 {
69 int cpu = smp_processor_id();
70 Elf_Note *note = per_cpu(crash_notes, cpu);
71 ELF_Prstatus *prstatus;
72 crash_xen_core_t *xencore;
74 if ( cpu_test_and_set(cpu, crash_saved_cpus) )
75 return;
77 prstatus = (ELF_Prstatus *)ELFNOTE_DESC(note);
79 note = ELFNOTE_NEXT(note);
80 xencore = (crash_xen_core_t *)ELFNOTE_DESC(note);
82 elf_core_save_regs(&prstatus->pr_reg, xencore);
83 }
85 /* Set up the single Xen-specific-info crash note. */
86 crash_xen_info_t *kexec_crash_save_info(void)
87 {
88 int cpu = smp_processor_id();
89 crash_xen_info_t info;
90 crash_xen_info_t *out = (crash_xen_info_t *)ELFNOTE_DESC(xen_crash_note);
92 BUG_ON(!cpu_test_and_set(cpu, crash_saved_cpus));
94 memset(&info, 0, sizeof(info));
95 info.xen_major_version = xen_major_version();
96 info.xen_minor_version = xen_minor_version();
97 info.xen_extra_version = __pa(xen_extra_version());
98 info.xen_changeset = __pa(xen_changeset());
99 info.xen_compiler = __pa(xen_compiler());
100 info.xen_compile_date = __pa(xen_compile_date());
101 info.xen_compile_time = __pa(xen_compile_time());
102 info.tainted = tainted;
104 /* Copy from guaranteed-aligned local copy to possibly-unaligned dest. */
105 memcpy(out, &info, sizeof(info));
107 return out;
108 }
110 void kexec_crash(void)
111 {
112 int pos;
114 pos = (test_bit(KEXEC_FLAG_CRASH_POS, &kexec_flags) != 0);
115 if ( !test_bit(KEXEC_IMAGE_CRASH_BASE + pos, &kexec_flags) )
116 return;
118 console_start_sync();
120 one_cpu_only();
121 kexec_crash_save_cpu();
122 machine_crash_shutdown();
124 machine_kexec(&kexec_image[KEXEC_IMAGE_CRASH_BASE + pos]);
126 BUG();
127 }
129 static void do_crashdump_trigger(unsigned char key)
130 {
131 printk("'%c' pressed -> triggering crashdump\n", key);
132 kexec_crash();
133 printk(" * no crash kernel loaded!\n");
134 }
136 static __init int register_crashdump_trigger(void)
137 {
138 register_keyhandler('C', do_crashdump_trigger, "trigger a crashdump");
139 return 0;
140 }
141 __initcall(register_crashdump_trigger);
143 static void setup_note(Elf_Note *n, const char *name, int type, int descsz)
144 {
145 int l = strlen(name) + 1;
146 strlcpy(ELFNOTE_NAME(n), name, l);
147 n->namesz = l;
148 n->descsz = descsz;
149 n->type = type;
150 }
152 static int sizeof_note(const char *name, int descsz)
153 {
154 return (sizeof(Elf_Note) +
155 ELFNOTE_ALIGN(strlen(name)+1) +
156 ELFNOTE_ALIGN(descsz));
157 }
159 static int kexec_get_reserve(xen_kexec_range_t *range)
160 {
161 if ( kexec_crash_area.size > 0 && kexec_crash_area.start > 0) {
162 range->start = kexec_crash_area.start;
163 range->size = kexec_crash_area.size;
164 }
165 else
166 range->start = range->size = 0;
167 return 0;
168 }
170 static int kexec_get_cpu(xen_kexec_range_t *range)
171 {
172 int nr = range->nr;
173 int nr_bytes = 0;
175 if ( nr < 0 || nr >= num_present_cpus() )
176 return -EINVAL;
178 nr_bytes += sizeof_note("CORE", sizeof(ELF_Prstatus));
179 nr_bytes += sizeof_note("Xen", sizeof(crash_xen_core_t));
181 /* The Xen info note is included in CPU0's range. */
182 if ( nr == 0 )
183 nr_bytes += sizeof_note("Xen", sizeof(crash_xen_info_t));
185 if ( per_cpu(crash_notes, nr) == NULL )
186 {
187 Elf_Note *note;
189 note = per_cpu(crash_notes, nr) = xmalloc_bytes(nr_bytes);
191 if ( note == NULL )
192 return -ENOMEM;
194 /* Setup CORE note. */
195 setup_note(note, "CORE", NT_PRSTATUS, sizeof(ELF_Prstatus));
197 /* Setup Xen CORE note. */
198 note = ELFNOTE_NEXT(note);
199 setup_note(note, "Xen", XEN_ELFNOTE_CRASH_REGS, sizeof(crash_xen_core_t));
201 if (nr == 0)
202 {
203 /* Setup system wide Xen info note. */
204 xen_crash_note = note = ELFNOTE_NEXT(note);
205 setup_note(note, "Xen", XEN_ELFNOTE_CRASH_INFO, sizeof(crash_xen_info_t));
206 }
207 }
209 range->start = __pa((unsigned long)per_cpu(crash_notes, nr));
210 range->size = nr_bytes;
211 return 0;
212 }
214 static int kexec_get_vmcoreinfo(xen_kexec_range_t *range)
215 {
216 range->start = __pa((unsigned long)vmcoreinfo_data);
217 range->size = VMCOREINFO_BYTES;
218 return 0;
219 }
221 static int kexec_get_range_internal(xen_kexec_range_t *range)
222 {
223 int ret = -EINVAL;
225 switch ( range->range )
226 {
227 case KEXEC_RANGE_MA_CRASH:
228 ret = kexec_get_reserve(range);
229 break;
230 case KEXEC_RANGE_MA_CPU:
231 ret = kexec_get_cpu(range);
232 break;
233 case KEXEC_RANGE_MA_VMCOREINFO:
234 ret = kexec_get_vmcoreinfo(range);
235 break;
236 default:
237 ret = machine_kexec_get(range);
238 break;
239 }
241 return ret;
242 }
244 static int kexec_get_range(XEN_GUEST_HANDLE(void) uarg)
245 {
246 xen_kexec_range_t range;
247 int ret = -EINVAL;
249 if ( unlikely(copy_from_guest(&range, uarg, 1)) )
250 return -EFAULT;
252 ret = kexec_get_range_internal(&range);
254 if ( ret == 0 && unlikely(copy_to_guest(uarg, &range, 1)) )
255 return -EFAULT;
257 return ret;
258 }
260 static int kexec_get_range_compat(XEN_GUEST_HANDLE(void) uarg)
261 {
262 #ifdef CONFIG_COMPAT
263 xen_kexec_range_t range;
264 compat_kexec_range_t compat_range;
265 int ret = -EINVAL;
267 if ( unlikely(copy_from_guest(&compat_range, uarg, 1)) )
268 return -EFAULT;
270 XLAT_kexec_range(&range, &compat_range);
272 ret = kexec_get_range_internal(&range);
274 if ( ret == 0 ) {
275 XLAT_kexec_range(&compat_range, &range);
276 if ( unlikely(copy_to_guest(uarg, &compat_range, 1)) )
277 return -EFAULT;
278 }
280 return ret;
281 #else /* CONFIG_COMPAT */
282 return 0;
283 #endif /* CONFIG_COMPAT */
284 }
286 static int kexec_load_get_bits(int type, int *base, int *bit)
287 {
288 switch ( type )
289 {
290 case KEXEC_TYPE_DEFAULT:
291 *base = KEXEC_IMAGE_DEFAULT_BASE;
292 *bit = KEXEC_FLAG_DEFAULT_POS;
293 break;
294 case KEXEC_TYPE_CRASH:
295 *base = KEXEC_IMAGE_CRASH_BASE;
296 *bit = KEXEC_FLAG_CRASH_POS;
297 break;
298 default:
299 return -1;
300 }
301 return 0;
302 }
304 void vmcoreinfo_append_str(const char *fmt, ...)
305 {
306 va_list args;
307 char buf[0x50];
308 int r;
309 size_t note_size = sizeof(Elf_Note) + ELFNOTE_ALIGN(strlen(VMCOREINFO_NOTE_NAME) + 1);
311 if (vmcoreinfo_size + note_size + sizeof(buf) > VMCOREINFO_BYTES)
312 return;
314 va_start(args, fmt);
315 r = vsnprintf(buf, sizeof(buf), fmt, args);
316 va_end(args);
318 memcpy(&vmcoreinfo_data[note_size + vmcoreinfo_size], buf, r);
320 vmcoreinfo_size += r;
321 }
323 static void crash_save_vmcoreinfo(void)
324 {
325 size_t data_size;
327 if (vmcoreinfo_size > 0) /* already saved */
328 return;
330 data_size = VMCOREINFO_BYTES - (sizeof(Elf_Note) + ELFNOTE_ALIGN(strlen(VMCOREINFO_NOTE_NAME) + 1));
331 setup_note((Elf_Note *)vmcoreinfo_data, VMCOREINFO_NOTE_NAME, 0, data_size);
333 VMCOREINFO_PAGESIZE(PAGE_SIZE);
335 VMCOREINFO_SYMBOL(domain_list);
336 VMCOREINFO_SYMBOL(frame_table);
337 VMCOREINFO_SYMBOL(alloc_bitmap);
338 VMCOREINFO_SYMBOL(max_page);
339 VMCOREINFO_SYMBOL(xenheap_phys_end);
341 VMCOREINFO_STRUCT_SIZE(page_info);
342 VMCOREINFO_STRUCT_SIZE(domain);
344 VMCOREINFO_OFFSET(page_info, count_info);
345 VMCOREINFO_OFFSET_ALIAS(page_info, u, _domain);
346 VMCOREINFO_OFFSET(domain, domain_id);
347 VMCOREINFO_OFFSET(domain, next_in_list);
349 #ifdef ARCH_CRASH_SAVE_VMCOREINFO
350 arch_crash_save_vmcoreinfo();
351 #endif
352 }
354 static int kexec_load_unload_internal(unsigned long op, xen_kexec_load_t *load)
355 {
356 xen_kexec_image_t *image;
357 int base, bit, pos;
358 int ret = 0;
360 if ( kexec_load_get_bits(load->type, &base, &bit) )
361 return -EINVAL;
363 pos = (test_bit(bit, &kexec_flags) != 0);
365 /* Load the user data into an unused image */
366 if ( op == KEXEC_CMD_kexec_load )
367 {
368 image = &kexec_image[base + !pos];
370 BUG_ON(test_bit((base + !pos), &kexec_flags)); /* must be free */
372 memcpy(image, &load->image, sizeof(*image));
374 if ( !(ret = machine_kexec_load(load->type, base + !pos, image)) )
375 {
376 /* Set image present bit */
377 set_bit((base + !pos), &kexec_flags);
379 /* Make new image the active one */
380 change_bit(bit, &kexec_flags);
381 }
383 crash_save_vmcoreinfo();
384 }
386 /* Unload the old image if present and load successful */
387 if ( ret == 0 && !test_bit(KEXEC_FLAG_IN_PROGRESS, &kexec_flags) )
388 {
389 if ( test_and_clear_bit((base + pos), &kexec_flags) )
390 {
391 image = &kexec_image[base + pos];
392 machine_kexec_unload(load->type, base + pos, image);
393 }
394 }
396 return ret;
397 }
399 static int kexec_load_unload(unsigned long op, XEN_GUEST_HANDLE(void) uarg)
400 {
401 xen_kexec_load_t load;
403 if ( unlikely(copy_from_guest(&load, uarg, 1)) )
404 return -EFAULT;
406 return kexec_load_unload_internal(op, &load);
407 }
409 static int kexec_load_unload_compat(unsigned long op,
410 XEN_GUEST_HANDLE(void) uarg)
411 {
412 #ifdef CONFIG_COMPAT
413 compat_kexec_load_t compat_load;
414 xen_kexec_load_t load;
416 if ( unlikely(copy_from_guest(&compat_load, uarg, 1)) )
417 return -EFAULT;
419 /* This is a bit dodgy, load.image is inside load,
420 * but XLAT_kexec_load (which is automatically generated)
421 * doesn't translate load.image (correctly)
422 * Just copy load->type, the only other member, manually instead.
423 *
424 * XLAT_kexec_load(&load, &compat_load);
425 */
426 load.type = compat_load.type;
427 XLAT_kexec_image(&load.image, &compat_load.image);
429 return kexec_load_unload_internal(op, &load);
430 #else /* CONFIG_COMPAT */
431 return 0;
432 #endif /* CONFIG_COMPAT */
433 }
435 static int kexec_exec(XEN_GUEST_HANDLE(void) uarg)
436 {
437 xen_kexec_exec_t exec;
438 xen_kexec_image_t *image;
439 int base, bit, pos;
441 if ( unlikely(copy_from_guest(&exec, uarg, 1)) )
442 return -EFAULT;
444 if ( kexec_load_get_bits(exec.type, &base, &bit) )
445 return -EINVAL;
447 pos = (test_bit(bit, &kexec_flags) != 0);
449 /* Only allow kexec/kdump into loaded images */
450 if ( !test_bit(base + pos, &kexec_flags) )
451 return -ENOENT;
453 switch (exec.type)
454 {
455 case KEXEC_TYPE_DEFAULT:
456 image = &kexec_image[base + pos];
457 one_cpu_only();
458 machine_reboot_kexec(image); /* Does not return */
459 break;
460 case KEXEC_TYPE_CRASH:
461 kexec_crash(); /* Does not return */
462 break;
463 }
465 return -EINVAL; /* never reached */
466 }
468 int do_kexec_op_internal(unsigned long op, XEN_GUEST_HANDLE(void) uarg,
469 int compat)
470 {
471 unsigned long flags;
472 int ret = -EINVAL;
474 if ( !IS_PRIV(current->domain) )
475 return -EPERM;
477 ret = xsm_kexec();
478 if ( ret )
479 return ret;
481 switch ( op )
482 {
483 case KEXEC_CMD_kexec_get_range:
484 if (compat)
485 ret = kexec_get_range_compat(uarg);
486 else
487 ret = kexec_get_range(uarg);
488 break;
489 case KEXEC_CMD_kexec_load:
490 case KEXEC_CMD_kexec_unload:
491 spin_lock_irqsave(&kexec_lock, flags);
492 if (!test_bit(KEXEC_FLAG_IN_PROGRESS, &kexec_flags))
493 {
494 if (compat)
495 ret = kexec_load_unload_compat(op, uarg);
496 else
497 ret = kexec_load_unload(op, uarg);
498 }
499 spin_unlock_irqrestore(&kexec_lock, flags);
500 break;
501 case KEXEC_CMD_kexec:
502 ret = kexec_exec(uarg);
503 break;
504 }
506 return ret;
507 }
509 long do_kexec_op(unsigned long op, XEN_GUEST_HANDLE(void) uarg)
510 {
511 return do_kexec_op_internal(op, uarg, 0);
512 }
514 #ifdef CONFIG_COMPAT
515 int compat_kexec_op(unsigned long op, XEN_GUEST_HANDLE(void) uarg)
516 {
517 return do_kexec_op_internal(op, uarg, 1);
518 }
519 #endif
521 /*
522 * Local variables:
523 * mode: C
524 * c-set-style: "BSD"
525 * c-basic-offset: 4
526 * tab-width: 4
527 * indent-tabs-mode: nil
528 * End:
529 */