ia64/xen-unstable

changeset 12683:5d6be0099bdf

merge
author Ian Campbell <ian.campbell@xensource.com>
date Thu Nov 30 13:05:27 2006 +0000 (2006-11-30)
parents 88935ae47fa9 2a17ff9b8ffc
children b1d538e557ee
files
line diff
     1.1 --- a/buildconfigs/linux-defconfig_xen_x86_32	Thu Nov 30 10:57:28 2006 +0000
     1.2 +++ b/buildconfigs/linux-defconfig_xen_x86_32	Thu Nov 30 13:05:27 2006 +0000
     1.3 @@ -184,6 +184,7 @@ CONFIG_MTRR=y
     1.4  CONFIG_REGPARM=y
     1.5  CONFIG_SECCOMP=y
     1.6  CONFIG_HZ_100=y
     1.7 +CONFIG_KEXEC=y
     1.8  # CONFIG_HZ_250 is not set
     1.9  # CONFIG_HZ_1000 is not set
    1.10  CONFIG_HZ=100
    1.11 @@ -2776,6 +2777,7 @@ CONFIG_NTFS_FS=m
    1.12  #
    1.13  CONFIG_PROC_FS=y
    1.14  CONFIG_PROC_KCORE=y
    1.15 +# CONFIG_PROC_VMCORE is not set
    1.16  CONFIG_SYSFS=y
    1.17  CONFIG_TMPFS=y
    1.18  # CONFIG_HUGETLB_PAGE is not set
     2.1 --- a/buildconfigs/linux-defconfig_xen_x86_64	Thu Nov 30 10:57:28 2006 +0000
     2.2 +++ b/buildconfigs/linux-defconfig_xen_x86_64	Thu Nov 30 13:05:27 2006 +0000
     2.3 @@ -139,6 +139,7 @@ CONFIG_SWIOTLB=y
     2.4  CONFIG_PHYSICAL_START=0x100000
     2.5  CONFIG_SECCOMP=y
     2.6  CONFIG_HZ_100=y
     2.7 +CONFIG_KEXEC=y
     2.8  # CONFIG_HZ_250 is not set
     2.9  # CONFIG_HZ_1000 is not set
    2.10  CONFIG_HZ=100
     3.1 --- a/linux-2.6-xen-sparse/arch/i386/Kconfig	Thu Nov 30 10:57:28 2006 +0000
     3.2 +++ b/linux-2.6-xen-sparse/arch/i386/Kconfig	Thu Nov 30 13:05:27 2006 +0000
     3.3 @@ -726,7 +726,7 @@ source kernel/Kconfig.hz
     3.4  
     3.5  config KEXEC
     3.6  	bool "kexec system call (EXPERIMENTAL)"
     3.7 -	depends on EXPERIMENTAL && !X86_XEN
     3.8 +	depends on EXPERIMENTAL && !XEN_UNPRIVILEGED_GUEST
     3.9  	help
    3.10  	  kexec is a system call that implements the ability to shutdown your
    3.11  	  current kernel, and to start another kernel.  It is like a reboot
     4.1 --- a/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c	Thu Nov 30 10:57:28 2006 +0000
     4.2 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c	Thu Nov 30 13:05:27 2006 +0000
     4.3 @@ -69,6 +69,10 @@
     4.4  #include "setup_arch_pre.h"
     4.5  #include <bios_ebda.h>
     4.6  
     4.7 +#ifdef CONFIG_XEN
     4.8 +#include <xen/interface/kexec.h>
     4.9 +#endif
    4.10 +
    4.11  /* Forward Declaration. */
    4.12  void __init find_max_pfn(void);
    4.13  
    4.14 @@ -943,6 +947,7 @@ static void __init parse_cmdline_early (
    4.15  		 * after a kernel panic.
    4.16  		 */
    4.17  		else if (!memcmp(from, "crashkernel=", 12)) {
    4.18 +#ifndef CONFIG_XEN
    4.19  			unsigned long size, base;
    4.20  			size = memparse(from+12, &from);
    4.21  			if (*from == '@') {
    4.22 @@ -953,6 +958,10 @@ static void __init parse_cmdline_early (
    4.23  				crashk_res.start = base;
    4.24  				crashk_res.end   = base + size - 1;
    4.25  			}
    4.26 +#else
    4.27 +			printk("Ignoring crashkernel command line, "
    4.28 +			       "parameter will be supplied by xen\n");
    4.29 +#endif
    4.30  		}
    4.31  #endif
    4.32  #ifdef CONFIG_PROC_VMCORE
    4.33 @@ -1322,10 +1331,14 @@ void __init setup_bootmem_allocator(void
    4.34  	}
    4.35  #endif
    4.36  #ifdef CONFIG_KEXEC
    4.37 +#ifdef CONFIG_XEN
    4.38 +	xen_machine_kexec_setup_resources();
    4.39 +#else
    4.40  	if (crashk_res.start != crashk_res.end)
    4.41  		reserve_bootmem(crashk_res.start,
    4.42  			crashk_res.end - crashk_res.start + 1);
    4.43  #endif
    4.44 +#endif
    4.45  
    4.46  	if (!xen_feature(XENFEAT_auto_translated_physmap))
    4.47  		phys_to_machine_mapping =
    4.48 @@ -1389,7 +1402,11 @@ legacy_init_iomem_resources(struct e820e
    4.49  			request_resource(res, data_resource);
    4.50  #endif
    4.51  #ifdef CONFIG_KEXEC
    4.52 -			request_resource(res, &crashk_res);
    4.53 +			if (crashk_res.start != crashk_res.end)
    4.54 +			     request_resource(res, &crashk_res);
    4.55 +#ifdef CONFIG_XEN
    4.56 +			xen_machine_kexec_register_resources(res);
    4.57 +#endif
    4.58  #endif
    4.59  		}
    4.60  	}
     5.1 --- a/linux-2.6-xen-sparse/arch/x86_64/Kconfig	Thu Nov 30 10:57:28 2006 +0000
     5.2 +++ b/linux-2.6-xen-sparse/arch/x86_64/Kconfig	Thu Nov 30 13:05:27 2006 +0000
     5.3 @@ -435,7 +435,7 @@ config X86_MCE_AMD
     5.4  
     5.5  config KEXEC
     5.6  	bool "kexec system call (EXPERIMENTAL)"
     5.7 -	depends on EXPERIMENTAL && !X86_64_XEN
     5.8 +	depends on EXPERIMENTAL && !XEN_UNPRIVILEGED_GUEST
     5.9  	help
    5.10  	  kexec is a system call that implements the ability to shutdown your
    5.11  	  current kernel, and to start another kernel.  It is like a reboot
     6.1 --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c	Thu Nov 30 10:57:28 2006 +0000
     6.2 +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c	Thu Nov 30 13:05:27 2006 +0000
     6.3 @@ -260,7 +260,11 @@ void __init e820_reserve_resources(struc
     6.4  			request_resource(res, &data_resource);
     6.5  #endif
     6.6  #ifdef CONFIG_KEXEC
     6.7 -			request_resource(res, &crashk_res);
     6.8 +			if (crashk_res.start != crashk_res.end)
     6.9 +				request_resource(res, &crashk_res);
    6.10 +#ifdef CONFIG_XEN
    6.11 +			xen_machine_kexec_register_resources(res);
    6.12 +#endif
    6.13  #endif
    6.14  		}
    6.15  	}
     7.1 --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c	Thu Nov 30 10:57:28 2006 +0000
     7.2 +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c	Thu Nov 30 13:05:27 2006 +0000
     7.3 @@ -80,6 +80,10 @@
     7.4  #include <asm/mach-xen/setup_arch_post.h>
     7.5  #include <xen/interface/memory.h>
     7.6  
     7.7 +#ifdef CONFIG_XEN
     7.8 +#include <xen/interface/kexec.h>
     7.9 +#endif
    7.10 +
    7.11  extern unsigned long start_pfn;
    7.12  extern struct edid_info edid_info;
    7.13  
    7.14 @@ -450,6 +454,7 @@ static __init void parse_cmdline_early (
    7.15  		 * after a kernel panic.
    7.16  		 */
    7.17  		else if (!memcmp(from, "crashkernel=", 12)) {
    7.18 +#ifndef CONFIG_XEN
    7.19  			unsigned long size, base;
    7.20  			size = memparse(from+12, &from);
    7.21  			if (*from == '@') {
    7.22 @@ -460,6 +465,10 @@ static __init void parse_cmdline_early (
    7.23  				crashk_res.start = base;
    7.24  				crashk_res.end   = base + size - 1;
    7.25  			}
    7.26 +#else
    7.27 +			printk("Ignoring crashkernel command line, "
    7.28 +			       "parameter will be supplied by xen\n");
    7.29 +#endif
    7.30  		}
    7.31  #endif
    7.32  
    7.33 @@ -812,11 +821,15 @@ void __init setup_arch(char **cmdline_p)
    7.34  #endif
    7.35  #endif	/* !CONFIG_XEN */
    7.36  #ifdef CONFIG_KEXEC
    7.37 +#ifdef CONFIG_XEN
    7.38 +	xen_machine_kexec_setup_resources();
    7.39 +#else
    7.40  	if (crashk_res.start != crashk_res.end) {
    7.41  		reserve_bootmem(crashk_res.start,
    7.42  			crashk_res.end - crashk_res.start + 1);
    7.43  	}
    7.44  #endif
    7.45 +#endif
    7.46  
    7.47  	paging_init();
    7.48  #ifdef CONFIG_X86_LOCAL_APIC
     8.1 --- a/linux-2.6-xen-sparse/drivers/xen/core/Makefile	Thu Nov 30 10:57:28 2006 +0000
     8.2 +++ b/linux-2.6-xen-sparse/drivers/xen/core/Makefile	Thu Nov 30 13:05:27 2006 +0000
     8.3 @@ -11,3 +11,4 @@ obj-$(CONFIG_XEN_SYSFS)		+= xen_sysfs.o
     8.4  obj-$(CONFIG_XEN_SKBUFF)	+= skbuff.o
     8.5  obj-$(CONFIG_XEN_REBOOT)	+= reboot.o machine_reboot.o
     8.6  obj-$(CONFIG_XEN_SMPBOOT)	+= smpboot.o
     8.7 +obj-$(CONFIG_KEXEC)		+= machine_kexec.o
     9.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     9.2 +++ b/linux-2.6-xen-sparse/drivers/xen/core/machine_kexec.c	Thu Nov 30 13:05:27 2006 +0000
     9.3 @@ -0,0 +1,170 @@
     9.4 +/*
     9.5 + * drivers/xen/core/machine_kexec.c 
     9.6 + * handle transition of Linux booting another kernel
     9.7 + */
     9.8 +
     9.9 +#include <linux/kexec.h>
    9.10 +#include <xen/interface/kexec.h>
    9.11 +#include <linux/mm.h>
    9.12 +#include <linux/bootmem.h>
    9.13 +#include <asm/hypercall.h>
    9.14 +
    9.15 +extern void machine_kexec_setup_load_arg(xen_kexec_image_t *xki, 
    9.16 +					 struct kimage *image);
    9.17 +
    9.18 +int xen_max_nr_phys_cpus;
    9.19 +struct resource xen_hypervisor_res;
    9.20 +struct resource *xen_phys_cpus;
    9.21 +
    9.22 +void xen_machine_kexec_setup_resources(void)
    9.23 +{
    9.24 +	xen_kexec_range_t range;
    9.25 +	struct resource *res;
    9.26 +	int k = 0;
    9.27 +
    9.28 +	/* determine maximum number of physical cpus */
    9.29 +
    9.30 +	while (1) {
    9.31 +		memset(&range, 0, sizeof(range));
    9.32 +		range.range = KEXEC_RANGE_MA_CPU;
    9.33 +		range.nr = k;
    9.34 +
    9.35 +		if (HYPERVISOR_kexec_op(KEXEC_CMD_kexec_get_range, &range))
    9.36 +			break;
    9.37 +
    9.38 +		k++;
    9.39 +	}
    9.40 +
    9.41 +	xen_max_nr_phys_cpus = k;
    9.42 +
    9.43 +	/* allocate xen_phys_cpus */
    9.44 +
    9.45 +	xen_phys_cpus = alloc_bootmem_low(k * sizeof(struct resource));
    9.46 +	BUG_ON(!xen_phys_cpus);
    9.47 +
    9.48 +	/* fill in xen_phys_cpus with per-cpu crash note information */
    9.49 +
    9.50 +	for (k = 0; k < xen_max_nr_phys_cpus; k++) {
    9.51 +		memset(&range, 0, sizeof(range));
    9.52 +		range.range = KEXEC_RANGE_MA_CPU;
    9.53 +		range.nr = k;
    9.54 +
    9.55 +		BUG_ON(HYPERVISOR_kexec_op(KEXEC_CMD_kexec_get_range, &range));
    9.56 +
    9.57 +		res = xen_phys_cpus + k;
    9.58 +
    9.59 +		memset(res, 0, sizeof(*res));
    9.60 +		res->name = "Crash note";
    9.61 +		res->start = range.start;
    9.62 +		res->end = range.start + range.size - 1;
    9.63 +		res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
    9.64 +	}
    9.65 +
    9.66 +	/* fill in xen_hypervisor_res with hypervisor machine address range */
    9.67 +
    9.68 +	memset(&range, 0, sizeof(range));
    9.69 +	range.range = KEXEC_RANGE_MA_XEN;
    9.70 +
    9.71 +	BUG_ON(HYPERVISOR_kexec_op(KEXEC_CMD_kexec_get_range, &range));
    9.72 +
    9.73 +	xen_hypervisor_res.name = "Hypervisor code and data";
    9.74 +	xen_hypervisor_res.start = range.start;
    9.75 +	xen_hypervisor_res.end = range.start + range.size - 1;
    9.76 +	xen_hypervisor_res.flags = IORESOURCE_BUSY | IORESOURCE_MEM;
    9.77 +
    9.78 +	/* fill in crashk_res if range is reserved by hypervisor */
    9.79 +
    9.80 +	memset(&range, 0, sizeof(range));
    9.81 +	range.range = KEXEC_RANGE_MA_CRASH;
    9.82 +
    9.83 +	BUG_ON(HYPERVISOR_kexec_op(KEXEC_CMD_kexec_get_range, &range));
    9.84 +
    9.85 +	if (range.size) {
    9.86 +		crashk_res.start = range.start;
    9.87 +		crashk_res.end = range.start + range.size - 1;
    9.88 +	}
    9.89 +}
    9.90 +
    9.91 +void xen_machine_kexec_register_resources(struct resource *res)
    9.92 +{
    9.93 +	int k;
    9.94 +
    9.95 +	request_resource(res, &xen_hypervisor_res);
    9.96 +
    9.97 +	for (k = 0; k < xen_max_nr_phys_cpus; k++)
    9.98 +		request_resource(res, xen_phys_cpus + k);
    9.99 +
   9.100 +}
   9.101 +
   9.102 +static void setup_load_arg(xen_kexec_image_t *xki, struct kimage *image)
   9.103 +{
   9.104 +	machine_kexec_setup_load_arg(xki, image);
   9.105 +
   9.106 +	xki->indirection_page = image->head;
   9.107 +	xki->start_address = image->start;
   9.108 +}
   9.109 +
   9.110 +/*
   9.111 + * Load the image into xen so xen can kdump itself
   9.112 + * This might have been done in prepare, but prepare
   9.113 + * is currently called too early. It might make sense
   9.114 + * to move prepare, but for now, just add an extra hook.
   9.115 + */
   9.116 +int xen_machine_kexec_load(struct kimage *image)
   9.117 +{
   9.118 +	xen_kexec_load_t xkl;
   9.119 +
   9.120 +	memset(&xkl, 0, sizeof(xkl));
   9.121 +	xkl.type = image->type;
   9.122 +	setup_load_arg(&xkl.image, image);
   9.123 +	return HYPERVISOR_kexec_op(KEXEC_CMD_kexec_load, &xkl);
   9.124 +}
   9.125 +
   9.126 +/*
   9.127 + * Unload the image that was stored by machine_kexec_load()
   9.128 + * This might have been done in machine_kexec_cleanup() but it
   9.129 + * is called too late, and its possible xen could try and kdump
   9.130 + * using resources that have been freed.
   9.131 + */
   9.132 +void xen_machine_kexec_unload(struct kimage *image)
   9.133 +{
   9.134 +	xen_kexec_load_t xkl;
   9.135 +
   9.136 +	memset(&xkl, 0, sizeof(xkl));
   9.137 +	xkl.type = image->type;
   9.138 +	HYPERVISOR_kexec_op(KEXEC_CMD_kexec_unload, &xkl);
   9.139 +}
   9.140 +
   9.141 +/*
   9.142 + * Do not allocate memory (or fail in any way) in machine_kexec().
   9.143 + * We are past the point of no return, committed to rebooting now.
   9.144 + *
   9.145 + * This has the hypervisor move to the prefered reboot CPU, 
   9.146 + * stop all CPUs and kexec. That is it combines machine_shutdown()
   9.147 + * and machine_kexec() in Linux kexec terms.
   9.148 + */
   9.149 +NORET_TYPE void xen_machine_kexec(struct kimage *image)
   9.150 +{
   9.151 +	xen_kexec_exec_t xke;
   9.152 +
   9.153 +	memset(&xke, 0, sizeof(xke));
   9.154 +	xke.type = image->type;
   9.155 +	HYPERVISOR_kexec_op(KEXEC_CMD_kexec, &xke);
   9.156 +	panic("KEXEC_CMD_kexec hypercall should not return\n");
   9.157 +}
   9.158 +
   9.159 +void machine_shutdown(void)
   9.160 +{
   9.161 +	/* do nothing */
   9.162 +}
   9.163 +
   9.164 +
   9.165 +/*
   9.166 + * Local variables:
   9.167 + *  c-file-style: "linux"
   9.168 + *  indent-tabs-mode: t
   9.169 + *  c-indent-level: 8
   9.170 + *  c-basic-offset: 8
   9.171 + *  tab-width: 8
   9.172 + * End:
   9.173 + */
    10.1 --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h	Thu Nov 30 10:57:28 2006 +0000
    10.2 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h	Thu Nov 30 13:05:27 2006 +0000
    10.3 @@ -395,5 +395,13 @@ HYPERVISOR_xenoprof_op(
    10.4  	return _hypercall2(int, xenoprof_op, op, arg);
    10.5  }
    10.6  
    10.7 +static inline int
    10.8 +HYPERVISOR_kexec_op(
    10.9 +	unsigned long op, void *args)
   10.10 +{
   10.11 +	return _hypercall2(int, kexec_op, op, args);
   10.12 +}
   10.13 +
   10.14 +
   10.15  
   10.16  #endif /* __HYPERCALL_H__ */
    11.1 --- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/hypercall.h	Thu Nov 30 10:57:28 2006 +0000
    11.2 +++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/hypercall.h	Thu Nov 30 13:05:27 2006 +0000
    11.3 @@ -396,4 +396,11 @@ HYPERVISOR_xenoprof_op(
    11.4  	return _hypercall2(int, xenoprof_op, op, arg);
    11.5  }
    11.6  
    11.7 +static inline int
    11.8 +HYPERVISOR_kexec_op(
    11.9 +	unsigned long op, void *args)
   11.10 +{
   11.11 +	return _hypercall2(int, kexec_op, op, args);
   11.12 +}
   11.13 +
   11.14  #endif /* __HYPERCALL_H__ */
    12.1 --- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/ptrace.h	Thu Nov 30 10:57:28 2006 +0000
    12.2 +++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/ptrace.h	Thu Nov 30 13:05:27 2006 +0000
    12.3 @@ -90,6 +90,8 @@ extern unsigned long profile_pc(struct p
    12.4  #define profile_pc(regs) instruction_pointer(regs)
    12.5  #endif
    12.6  
    12.7 +#include <linux/compiler.h>
    12.8 +
    12.9  void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
   12.10  
   12.11  struct task_struct;
    13.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    13.2 +++ b/patches/linux-2.6.16.33/git-2a8a3d5b65e86ec1dfef7d268c64a909eab94af7.patch	Thu Nov 30 13:05:27 2006 +0000
    13.3 @@ -0,0 +1,62 @@
    13.4 +From: Eric W. Biederman <ebiederm@xmission.com>
    13.5 +Date: Sun, 30 Jul 2006 10:03:20 +0000 (-0700)
    13.6 +Subject: [PATCH] machine_kexec.c: Fix the description of segment handling
    13.7 +X-Git-Tag: v2.6.18-rc4
    13.8 +X-Git-Url: http://www.kernel.org/git/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff;h=2a8a3d5b65e86ec1dfef7d268c64a909eab94af7
    13.9 +
   13.10 +[PATCH] machine_kexec.c: Fix the description of segment handling
   13.11 +
   13.12 +One of my original comments in machine_kexec was unclear
   13.13 +and this should fix it.
   13.14 +
   13.15 +Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
   13.16 +Cc: Andi Kleen <ak@muc.de>
   13.17 +Acked-by: Horms <horms@verge.net.au>
   13.18 +Signed-off-by: Andrew Morton <akpm@osdl.org>
   13.19 +Signed-off-by: Linus Torvalds <torvalds@osdl.org>
   13.20 +---
   13.21 +
   13.22 +--- a/arch/i386/kernel/machine_kexec.c
   13.23 ++++ b/arch/i386/kernel/machine_kexec.c
   13.24 +@@ -189,14 +189,11 @@ NORET_TYPE void machine_kexec(struct kim
   13.25 + 	memcpy((void *)reboot_code_buffer, relocate_new_kernel,
   13.26 + 						relocate_new_kernel_size);
   13.27 + 
   13.28 +-	/* The segment registers are funny things, they are
   13.29 +-	 * automatically loaded from a table, in memory wherever you
   13.30 +-	 * set them to a specific selector, but this table is never
   13.31 +-	 * accessed again you set the segment to a different selector.
   13.32 +-	 *
   13.33 +-	 * The more common model is are caches where the behide
   13.34 +-	 * the scenes work is done, but is also dropped at arbitrary
   13.35 +-	 * times.
   13.36 ++	/* The segment registers are funny things, they have both a
   13.37 ++	 * visible and an invisible part.  Whenever the visible part is
   13.38 ++	 * set to a specific selector, the invisible part is loaded
   13.39 ++	 * with from a table in memory.  At no other time is the
   13.40 ++	 * descriptor table in memory accessed.
   13.41 + 	 *
   13.42 + 	 * I take advantage of this here by force loading the
   13.43 + 	 * segments, before I zap the gdt with an invalid value.
   13.44 +--- a/arch/x86_64/kernel/machine_kexec.c
   13.45 ++++ b/arch/x86_64/kernel/machine_kexec.c
   13.46 +@@ -207,14 +207,11 @@ NORET_TYPE void machine_kexec(struct kim
   13.47 + 	__flush_tlb();
   13.48 + 
   13.49 + 
   13.50 +-	/* The segment registers are funny things, they are
   13.51 +-	 * automatically loaded from a table, in memory wherever you
   13.52 +-	 * set them to a specific selector, but this table is never
   13.53 +-	 * accessed again unless you set the segment to a different selector.
   13.54 +-	 *
   13.55 +-	 * The more common model are caches where the behide
   13.56 +-	 * the scenes work is done, but is also dropped at arbitrary
   13.57 +-	 * times.
   13.58 ++	/* The segment registers are funny things, they have both a
   13.59 ++	 * visible and an invisible part.  Whenever the visible part is
   13.60 ++	 * set to a specific selector, the invisible part is loaded
   13.61 ++	 * with from a table in memory.  At no other time is the
   13.62 ++	 * descriptor table in memory accessed.
   13.63 + 	 *
   13.64 + 	 * I take advantage of this here by force loading the
   13.65 + 	 * segments, before I zap the gdt with an invalid value.
    14.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    14.2 +++ b/patches/linux-2.6.16.33/git-2efe55a9cec8418f0e0cde3dc3787a42fddc4411.patch	Thu Nov 30 13:05:27 2006 +0000
    14.3 @@ -0,0 +1,93 @@
    14.4 +From: Tobias Klauser <tklauser@nuerscht.ch>
    14.5 +Date: Mon, 26 Jun 2006 16:57:34 +0000 (+0200)
    14.6 +Subject: Storage class should be first
    14.7 +X-Git-Tag: v2.6.18-rc1
    14.8 +X-Git-Url: http://www.kernel.org/git/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff;h=2efe55a9cec8418f0e0cde3dc3787a42fddc4411
    14.9 +
   14.10 +Storage class should be first
   14.11 +
   14.12 +Storage class should be before const
   14.13 +
   14.14 +Signed-off-by: Tobias Klauser <tklauser@nuerscht.ch>
   14.15 +Signed-off-by: Adrian Bunk <bunk@stusta.de>
   14.16 +---
   14.17 +
   14.18 +--- a/arch/i386/kernel/machine_kexec.c
   14.19 ++++ b/arch/i386/kernel/machine_kexec.c
   14.20 +@@ -133,9 +133,9 @@ typedef asmlinkage NORET_TYPE void (*rel
   14.21 + 					unsigned long start_address,
   14.22 + 					unsigned int has_pae) ATTRIB_NORET;
   14.23 + 
   14.24 +-const extern unsigned char relocate_new_kernel[];
   14.25 ++extern const unsigned char relocate_new_kernel[];
   14.26 + extern void relocate_new_kernel_end(void);
   14.27 +-const extern unsigned int relocate_new_kernel_size;
   14.28 ++extern const unsigned int relocate_new_kernel_size;
   14.29 + 
   14.30 + /*
   14.31 +  * A architecture hook called to validate the
   14.32 +--- a/arch/powerpc/kernel/machine_kexec_32.c
   14.33 ++++ b/arch/powerpc/kernel/machine_kexec_32.c
   14.34 +@@ -30,8 +30,8 @@ typedef NORET_TYPE void (*relocate_new_k
   14.35 +  */
   14.36 + void default_machine_kexec(struct kimage *image)
   14.37 + {
   14.38 +-	const extern unsigned char relocate_new_kernel[];
   14.39 +-	const extern unsigned int relocate_new_kernel_size;
   14.40 ++	extern const unsigned char relocate_new_kernel[];
   14.41 ++	extern const unsigned int relocate_new_kernel_size;
   14.42 + 	unsigned long page_list;
   14.43 + 	unsigned long reboot_code_buffer, reboot_code_buffer_phys;
   14.44 + 	relocate_new_kernel_t rnk;
   14.45 +--- a/arch/ppc/kernel/machine_kexec.c
   14.46 ++++ b/arch/ppc/kernel/machine_kexec.c
   14.47 +@@ -25,8 +25,8 @@ typedef NORET_TYPE void (*relocate_new_k
   14.48 + 				unsigned long reboot_code_buffer,
   14.49 + 				unsigned long start_address) ATTRIB_NORET;
   14.50 + 
   14.51 +-const extern unsigned char relocate_new_kernel[];
   14.52 +-const extern unsigned int relocate_new_kernel_size;
   14.53 ++extern const unsigned char relocate_new_kernel[];
   14.54 ++extern const unsigned int relocate_new_kernel_size;
   14.55 + 
   14.56 + void machine_shutdown(void)
   14.57 + {
   14.58 +--- a/arch/s390/kernel/machine_kexec.c
   14.59 ++++ b/arch/s390/kernel/machine_kexec.c
   14.60 +@@ -27,8 +27,8 @@ static void kexec_halt_all_cpus(void *);
   14.61 + 
   14.62 + typedef void (*relocate_kernel_t) (kimage_entry_t *, unsigned long);
   14.63 + 
   14.64 +-const extern unsigned char relocate_kernel[];
   14.65 +-const extern unsigned long long relocate_kernel_len;
   14.66 ++extern const unsigned char relocate_kernel[];
   14.67 ++extern const unsigned long long relocate_kernel_len;
   14.68 + 
   14.69 + int
   14.70 + machine_kexec_prepare(struct kimage *image)
   14.71 +--- a/arch/sh/kernel/machine_kexec.c
   14.72 ++++ b/arch/sh/kernel/machine_kexec.c
   14.73 +@@ -25,8 +25,8 @@ typedef NORET_TYPE void (*relocate_new_k
   14.74 + 				unsigned long start_address,
   14.75 + 				unsigned long vbr_reg) ATTRIB_NORET;
   14.76 + 
   14.77 +-const extern unsigned char relocate_new_kernel[];
   14.78 +-const extern unsigned int relocate_new_kernel_size;
   14.79 ++extern const unsigned char relocate_new_kernel[];
   14.80 ++extern const unsigned int relocate_new_kernel_size;
   14.81 + extern void *gdb_vbr_vector;
   14.82 + 
   14.83 + /*
   14.84 +--- a/arch/x86_64/kernel/machine_kexec.c
   14.85 ++++ b/arch/x86_64/kernel/machine_kexec.c
   14.86 +@@ -149,8 +149,8 @@ typedef NORET_TYPE void (*relocate_new_k
   14.87 + 					unsigned long start_address,
   14.88 + 					unsigned long pgtable) ATTRIB_NORET;
   14.89 + 
   14.90 +-const extern unsigned char relocate_new_kernel[];
   14.91 +-const extern unsigned long relocate_new_kernel_size;
   14.92 ++extern const unsigned char relocate_new_kernel[];
   14.93 ++extern const unsigned long relocate_new_kernel_size;
   14.94 + 
   14.95 + int machine_kexec_prepare(struct kimage *image)
   14.96 + {
    15.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    15.2 +++ b/patches/linux-2.6.16.33/git-3566561bfadffcb5dbc85d576be80c0dbf2cccc9.patch	Thu Nov 30 13:05:27 2006 +0000
    15.3 @@ -0,0 +1,401 @@
    15.4 +From: Magnus Damm <magnus@valinux.co.jp>
    15.5 +Date: Tue, 26 Sep 2006 08:52:38 +0000 (+0200)
    15.6 +Subject: [PATCH] i386: Avoid overwriting the current pgd (V4, i386)
    15.7 +X-Git-Url: http://www.kernel.org/git/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff;h=3566561bfadffcb5dbc85d576be80c0dbf2cccc9
    15.8 +
    15.9 +[PATCH] i386: Avoid overwriting the current pgd (V4, i386)
   15.10 +
   15.11 +kexec: Avoid overwriting the current pgd (V4, i386)
   15.12 +
   15.13 +This patch upgrades the i386-specific kexec code to avoid overwriting the
   15.14 +current pgd. Overwriting the current pgd is bad when CONFIG_CRASH_DUMP is used
   15.15 +to start a secondary kernel that dumps the memory of the previous kernel.
   15.16 +
   15.17 +The code introduces a new set of page tables. These tables are used to provide
   15.18 +an executable identity mapping without overwriting the current pgd.
   15.19 +
   15.20 +Signed-off-by: Magnus Damm <magnus@valinux.co.jp>
   15.21 +Signed-off-by: Andi Kleen <ak@suse.de>
   15.22 +---
   15.23 +
   15.24 +--- a/arch/i386/kernel/machine_kexec.c
   15.25 ++++ b/arch/i386/kernel/machine_kexec.c
   15.26 +@@ -21,70 +21,13 @@
   15.27 + #include <asm/system.h>
   15.28 + 
   15.29 + #define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
   15.30 +-
   15.31 +-#define L0_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
   15.32 +-#define L1_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
   15.33 +-#define L2_ATTR (_PAGE_PRESENT)
   15.34 +-
   15.35 +-#define LEVEL0_SIZE (1UL << 12UL)
   15.36 +-
   15.37 +-#ifndef CONFIG_X86_PAE
   15.38 +-#define LEVEL1_SIZE (1UL << 22UL)
   15.39 +-static u32 pgtable_level1[1024] PAGE_ALIGNED;
   15.40 +-
   15.41 +-static void identity_map_page(unsigned long address)
   15.42 +-{
   15.43 +-	unsigned long level1_index, level2_index;
   15.44 +-	u32 *pgtable_level2;
   15.45 +-
   15.46 +-	/* Find the current page table */
   15.47 +-	pgtable_level2 = __va(read_cr3());
   15.48 +-
   15.49 +-	/* Find the indexes of the physical address to identity map */
   15.50 +-	level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE;
   15.51 +-	level2_index = address / LEVEL1_SIZE;
   15.52 +-
   15.53 +-	/* Identity map the page table entry */
   15.54 +-	pgtable_level1[level1_index] = address | L0_ATTR;
   15.55 +-	pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR;
   15.56 +-
   15.57 +-	/* Flush the tlb so the new mapping takes effect.
   15.58 +-	 * Global tlb entries are not flushed but that is not an issue.
   15.59 +-	 */
   15.60 +-	load_cr3(pgtable_level2);
   15.61 +-}
   15.62 +-
   15.63 +-#else
   15.64 +-#define LEVEL1_SIZE (1UL << 21UL)
   15.65 +-#define LEVEL2_SIZE (1UL << 30UL)
   15.66 +-static u64 pgtable_level1[512] PAGE_ALIGNED;
   15.67 +-static u64 pgtable_level2[512] PAGE_ALIGNED;
   15.68 +-
   15.69 +-static void identity_map_page(unsigned long address)
   15.70 +-{
   15.71 +-	unsigned long level1_index, level2_index, level3_index;
   15.72 +-	u64 *pgtable_level3;
   15.73 +-
   15.74 +-	/* Find the current page table */
   15.75 +-	pgtable_level3 = __va(read_cr3());
   15.76 +-
   15.77 +-	/* Find the indexes of the physical address to identity map */
   15.78 +-	level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE;
   15.79 +-	level2_index = (address % LEVEL2_SIZE)/LEVEL1_SIZE;
   15.80 +-	level3_index = address / LEVEL2_SIZE;
   15.81 +-
   15.82 +-	/* Identity map the page table entry */
   15.83 +-	pgtable_level1[level1_index] = address | L0_ATTR;
   15.84 +-	pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR;
   15.85 +-	set_64bit(&pgtable_level3[level3_index],
   15.86 +-					       __pa(pgtable_level2) | L2_ATTR);
   15.87 +-
   15.88 +-	/* Flush the tlb so the new mapping takes effect.
   15.89 +-	 * Global tlb entries are not flushed but that is not an issue.
   15.90 +-	 */
   15.91 +-	load_cr3(pgtable_level3);
   15.92 +-}
   15.93 ++static u32 kexec_pgd[1024] PAGE_ALIGNED;
   15.94 ++#ifdef CONFIG_X86_PAE
   15.95 ++static u32 kexec_pmd0[1024] PAGE_ALIGNED;
   15.96 ++static u32 kexec_pmd1[1024] PAGE_ALIGNED;
   15.97 + #endif
   15.98 ++static u32 kexec_pte0[1024] PAGE_ALIGNED;
   15.99 ++static u32 kexec_pte1[1024] PAGE_ALIGNED;
  15.100 + 
  15.101 + static void set_idt(void *newidt, __u16 limit)
  15.102 + {
  15.103 +@@ -128,16 +71,6 @@ static void load_segments(void)
  15.104 + #undef __STR
  15.105 + }
  15.106 + 
  15.107 +-typedef asmlinkage NORET_TYPE void (*relocate_new_kernel_t)(
  15.108 +-					unsigned long indirection_page,
  15.109 +-					unsigned long reboot_code_buffer,
  15.110 +-					unsigned long start_address,
  15.111 +-					unsigned int has_pae) ATTRIB_NORET;
  15.112 +-
  15.113 +-extern const unsigned char relocate_new_kernel[];
  15.114 +-extern void relocate_new_kernel_end(void);
  15.115 +-extern const unsigned int relocate_new_kernel_size;
  15.116 +-
  15.117 + /*
  15.118 +  * A architecture hook called to validate the
  15.119 +  * proposed image and prepare the control pages
  15.120 +@@ -170,25 +103,29 @@ void machine_kexec_cleanup(struct kimage
  15.121 +  */
  15.122 + NORET_TYPE void machine_kexec(struct kimage *image)
  15.123 + {
  15.124 +-	unsigned long page_list;
  15.125 +-	unsigned long reboot_code_buffer;
  15.126 +-
  15.127 +-	relocate_new_kernel_t rnk;
  15.128 ++	unsigned long page_list[PAGES_NR];
  15.129 ++	void *control_page;
  15.130 + 
  15.131 + 	/* Interrupts aren't acceptable while we reboot */
  15.132 + 	local_irq_disable();
  15.133 + 
  15.134 +-	/* Compute some offsets */
  15.135 +-	reboot_code_buffer = page_to_pfn(image->control_code_page)
  15.136 +-								<< PAGE_SHIFT;
  15.137 +-	page_list = image->head;
  15.138 +-
  15.139 +-	/* Set up an identity mapping for the reboot_code_buffer */
  15.140 +-	identity_map_page(reboot_code_buffer);
  15.141 +-
  15.142 +-	/* copy it out */
  15.143 +-	memcpy((void *)reboot_code_buffer, relocate_new_kernel,
  15.144 +-						relocate_new_kernel_size);
  15.145 ++	control_page = page_address(image->control_code_page);
  15.146 ++	memcpy(control_page, relocate_kernel, PAGE_SIZE);
  15.147 ++
  15.148 ++	page_list[PA_CONTROL_PAGE] = __pa(control_page);
  15.149 ++	page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel;
  15.150 ++	page_list[PA_PGD] = __pa(kexec_pgd);
  15.151 ++	page_list[VA_PGD] = (unsigned long)kexec_pgd;
  15.152 ++#ifdef CONFIG_X86_PAE
  15.153 ++	page_list[PA_PMD_0] = __pa(kexec_pmd0);
  15.154 ++	page_list[VA_PMD_0] = (unsigned long)kexec_pmd0;
  15.155 ++	page_list[PA_PMD_1] = __pa(kexec_pmd1);
  15.156 ++	page_list[VA_PMD_1] = (unsigned long)kexec_pmd1;
  15.157 ++#endif
  15.158 ++	page_list[PA_PTE_0] = __pa(kexec_pte0);
  15.159 ++	page_list[VA_PTE_0] = (unsigned long)kexec_pte0;
  15.160 ++	page_list[PA_PTE_1] = __pa(kexec_pte1);
  15.161 ++	page_list[VA_PTE_1] = (unsigned long)kexec_pte1;
  15.162 + 
  15.163 + 	/* The segment registers are funny things, they have both a
  15.164 + 	 * visible and an invisible part.  Whenever the visible part is
  15.165 +@@ -207,8 +144,8 @@ NORET_TYPE void machine_kexec(struct kim
  15.166 + 	set_idt(phys_to_virt(0),0);
  15.167 + 
  15.168 + 	/* now call it */
  15.169 +-	rnk = (relocate_new_kernel_t) reboot_code_buffer;
  15.170 +-	(*rnk)(page_list, reboot_code_buffer, image->start, cpu_has_pae);
  15.171 ++	relocate_kernel((unsigned long)image->head, (unsigned long)page_list,
  15.172 ++			image->start, cpu_has_pae);
  15.173 + }
  15.174 + 
  15.175 + /* crashkernel=size@addr specifies the location to reserve for
  15.176 +--- a/arch/i386/kernel/relocate_kernel.S
  15.177 ++++ b/arch/i386/kernel/relocate_kernel.S
  15.178 +@@ -7,16 +7,138 @@
  15.179 +  */
  15.180 + 
  15.181 + #include <linux/linkage.h>
  15.182 ++#include <asm/page.h>
  15.183 ++#include <asm/kexec.h>
  15.184 ++
  15.185 ++/*
  15.186 ++ * Must be relocatable PIC code callable as a C function
  15.187 ++ */
  15.188 ++
  15.189 ++#define PTR(x) (x << 2)
  15.190 ++#define PAGE_ALIGNED (1 << PAGE_SHIFT)
  15.191 ++#define PAGE_ATTR 0x63 /* _PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY */
  15.192 ++#define PAE_PGD_ATTR 0x01 /* _PAGE_PRESENT */
  15.193 ++
  15.194 ++	.text
  15.195 ++	.align PAGE_ALIGNED
  15.196 ++	.globl relocate_kernel
  15.197 ++relocate_kernel:
  15.198 ++	movl	8(%esp), %ebp /* list of pages */
  15.199 ++
  15.200 ++#ifdef CONFIG_X86_PAE
  15.201 ++	/* map the control page at its virtual address */
  15.202 ++
  15.203 ++	movl	PTR(VA_PGD)(%ebp), %edi
  15.204 ++	movl	PTR(VA_CONTROL_PAGE)(%ebp), %eax
  15.205 ++	andl	$0xc0000000, %eax
  15.206 ++	shrl	$27, %eax
  15.207 ++	addl	%edi, %eax
  15.208 ++
  15.209 ++	movl	PTR(PA_PMD_0)(%ebp), %edx
  15.210 ++	orl	$PAE_PGD_ATTR, %edx
  15.211 ++	movl	%edx, (%eax)
  15.212 ++
  15.213 ++	movl	PTR(VA_PMD_0)(%ebp), %edi
  15.214 ++	movl	PTR(VA_CONTROL_PAGE)(%ebp), %eax
  15.215 ++	andl	$0x3fe00000, %eax
  15.216 ++	shrl	$18, %eax
  15.217 ++	addl	%edi, %eax
  15.218 ++
  15.219 ++	movl	PTR(PA_PTE_0)(%ebp), %edx
  15.220 ++	orl	$PAGE_ATTR, %edx
  15.221 ++	movl	%edx, (%eax)
  15.222 ++
  15.223 ++	movl	PTR(VA_PTE_0)(%ebp), %edi
  15.224 ++	movl	PTR(VA_CONTROL_PAGE)(%ebp), %eax
  15.225 ++	andl	$0x001ff000, %eax
  15.226 ++	shrl	$9, %eax
  15.227 ++	addl	%edi, %eax
  15.228 ++
  15.229 ++	movl	PTR(PA_CONTROL_PAGE)(%ebp), %edx
  15.230 ++	orl	$PAGE_ATTR, %edx
  15.231 ++	movl	%edx, (%eax)
  15.232 ++
  15.233 ++	/* identity map the control page at its physical address */
  15.234 ++
  15.235 ++	movl	PTR(VA_PGD)(%ebp), %edi
  15.236 ++	movl	PTR(PA_CONTROL_PAGE)(%ebp), %eax
  15.237 ++	andl	$0xc0000000, %eax
  15.238 ++	shrl	$27, %eax
  15.239 ++	addl	%edi, %eax
  15.240 ++
  15.241 ++	movl	PTR(PA_PMD_1)(%ebp), %edx
  15.242 ++	orl	$PAE_PGD_ATTR, %edx
  15.243 ++	movl	%edx, (%eax)
  15.244 ++
  15.245 ++	movl	PTR(VA_PMD_1)(%ebp), %edi
  15.246 ++	movl	PTR(PA_CONTROL_PAGE)(%ebp), %eax
  15.247 ++	andl	$0x3fe00000, %eax
  15.248 ++	shrl	$18, %eax
  15.249 ++	addl	%edi, %eax
  15.250 ++
  15.251 ++	movl	PTR(PA_PTE_1)(%ebp), %edx
  15.252 ++	orl	$PAGE_ATTR, %edx
  15.253 ++	movl	%edx, (%eax)
  15.254 ++
  15.255 ++	movl	PTR(VA_PTE_1)(%ebp), %edi
  15.256 ++	movl	PTR(PA_CONTROL_PAGE)(%ebp), %eax
  15.257 ++	andl	$0x001ff000, %eax
  15.258 ++	shrl	$9, %eax
  15.259 ++	addl	%edi, %eax
  15.260 ++
  15.261 ++	movl	PTR(PA_CONTROL_PAGE)(%ebp), %edx
  15.262 ++	orl	$PAGE_ATTR, %edx
  15.263 ++	movl	%edx, (%eax)
  15.264 ++#else
  15.265 ++	/* map the control page at its virtual address */
  15.266 ++
  15.267 ++	movl	PTR(VA_PGD)(%ebp), %edi
  15.268 ++	movl	PTR(VA_CONTROL_PAGE)(%ebp), %eax
  15.269 ++	andl	$0xffc00000, %eax
  15.270 ++	shrl	$20, %eax
  15.271 ++	addl	%edi, %eax
  15.272 ++
  15.273 ++	movl	PTR(PA_PTE_0)(%ebp), %edx
  15.274 ++	orl	$PAGE_ATTR, %edx
  15.275 ++	movl	%edx, (%eax)
  15.276 ++
  15.277 ++	movl	PTR(VA_PTE_0)(%ebp), %edi
  15.278 ++	movl	PTR(VA_CONTROL_PAGE)(%ebp), %eax
  15.279 ++	andl	$0x003ff000, %eax
  15.280 ++	shrl	$10, %eax
  15.281 ++	addl	%edi, %eax
  15.282 ++
  15.283 ++	movl	PTR(PA_CONTROL_PAGE)(%ebp), %edx
  15.284 ++	orl	$PAGE_ATTR, %edx
  15.285 ++	movl	%edx, (%eax)
  15.286 ++
  15.287 ++	/* identity map the control page at its physical address */
  15.288 ++
  15.289 ++	movl	PTR(VA_PGD)(%ebp), %edi
  15.290 ++	movl	PTR(PA_CONTROL_PAGE)(%ebp), %eax
  15.291 ++	andl	$0xffc00000, %eax
  15.292 ++	shrl	$20, %eax
  15.293 ++	addl	%edi, %eax
  15.294 ++
  15.295 ++	movl	PTR(PA_PTE_1)(%ebp), %edx
  15.296 ++	orl	$PAGE_ATTR, %edx
  15.297 ++	movl	%edx, (%eax)
  15.298 ++
  15.299 ++	movl	PTR(VA_PTE_1)(%ebp), %edi
  15.300 ++	movl	PTR(PA_CONTROL_PAGE)(%ebp), %eax
  15.301 ++	andl	$0x003ff000, %eax
  15.302 ++	shrl	$10, %eax
  15.303 ++	addl	%edi, %eax
  15.304 ++
  15.305 ++	movl	PTR(PA_CONTROL_PAGE)(%ebp), %edx
  15.306 ++	orl	$PAGE_ATTR, %edx
  15.307 ++	movl	%edx, (%eax)
  15.308 ++#endif
  15.309 + 
  15.310 +-	/*
  15.311 +-	 * Must be relocatable PIC code callable as a C function, that once
  15.312 +-	 * it starts can not use the previous processes stack.
  15.313 +-	 */
  15.314 +-	.globl relocate_new_kernel
  15.315 + relocate_new_kernel:
  15.316 + 	/* read the arguments and say goodbye to the stack */
  15.317 + 	movl  4(%esp), %ebx /* page_list */
  15.318 +-	movl  8(%esp), %ebp /* reboot_code_buffer */
  15.319 ++	movl  8(%esp), %ebp /* list of pages */
  15.320 + 	movl  12(%esp), %edx /* start address */
  15.321 + 	movl  16(%esp), %ecx /* cpu_has_pae */
  15.322 + 
  15.323 +@@ -24,11 +146,26 @@ relocate_new_kernel:
  15.324 + 	pushl $0
  15.325 + 	popfl
  15.326 + 
  15.327 +-	/* set a new stack at the bottom of our page... */
  15.328 +-	lea   4096(%ebp), %esp
  15.329 ++	/* get physical address of control page now */
  15.330 ++	/* this is impossible after page table switch */
  15.331 ++	movl	PTR(PA_CONTROL_PAGE)(%ebp), %edi
  15.332 ++
  15.333 ++	/* switch to new set of page tables */
  15.334 ++	movl	PTR(PA_PGD)(%ebp), %eax
  15.335 ++	movl	%eax, %cr3
  15.336 ++
  15.337 ++	/* setup a new stack at the end of the physical control page */
  15.338 ++	lea	4096(%edi), %esp
  15.339 + 
  15.340 +-	/* store the parameters back on the stack */
  15.341 +-	pushl   %edx /* store the start address */
  15.342 ++	/* jump to identity mapped page */
  15.343 ++	movl    %edi, %eax
  15.344 ++	addl    $(identity_mapped - relocate_kernel), %eax
  15.345 ++	pushl   %eax
  15.346 ++	ret
  15.347 ++
  15.348 ++identity_mapped:
  15.349 ++	/* store the start address on the stack */
  15.350 ++	pushl   %edx
  15.351 + 
  15.352 + 	/* Set cr0 to a known state:
  15.353 + 	 * 31 0 == Paging disabled
  15.354 +@@ -113,8 +250,3 @@ relocate_new_kernel:
  15.355 + 	xorl    %edi, %edi
  15.356 + 	xorl    %ebp, %ebp
  15.357 + 	ret
  15.358 +-relocate_new_kernel_end:
  15.359 +-
  15.360 +-	.globl relocate_new_kernel_size
  15.361 +-relocate_new_kernel_size:
  15.362 +-	.long relocate_new_kernel_end - relocate_new_kernel
  15.363 +--- a/include/asm-i386/kexec.h
  15.364 ++++ b/include/asm-i386/kexec.h
  15.365 +@@ -1,6 +1,26 @@
  15.366 + #ifndef _I386_KEXEC_H
  15.367 + #define _I386_KEXEC_H
  15.368 + 
  15.369 ++#define PA_CONTROL_PAGE  0
  15.370 ++#define VA_CONTROL_PAGE  1
  15.371 ++#define PA_PGD           2
  15.372 ++#define VA_PGD           3
  15.373 ++#define PA_PTE_0         4
  15.374 ++#define VA_PTE_0         5
  15.375 ++#define PA_PTE_1         6
  15.376 ++#define VA_PTE_1         7
  15.377 ++#ifdef CONFIG_X86_PAE
  15.378 ++#define PA_PMD_0         8
  15.379 ++#define VA_PMD_0         9
  15.380 ++#define PA_PMD_1         10
  15.381 ++#define VA_PMD_1         11
  15.382 ++#define PAGES_NR         12
  15.383 ++#else
  15.384 ++#define PAGES_NR         8
  15.385 ++#endif
  15.386 ++
  15.387 ++#ifndef __ASSEMBLY__
  15.388 ++
  15.389 + #include <asm/fixmap.h>
  15.390 + #include <asm/ptrace.h>
  15.391 + #include <asm/string.h>
  15.392 +@@ -72,5 +92,12 @@ static inline void crash_setup_regs(stru
  15.393 +                newregs->eip = (unsigned long)current_text_addr();
  15.394 +        }
  15.395 + }
  15.396 ++asmlinkage NORET_TYPE void
  15.397 ++relocate_kernel(unsigned long indirection_page,
  15.398 ++		unsigned long control_page,
  15.399 ++		unsigned long start_address,
  15.400 ++		unsigned int has_pae) ATTRIB_NORET;
  15.401 ++
  15.402 ++#endif /* __ASSEMBLY__ */
  15.403 + 
  15.404 + #endif /* _I386_KEXEC_H */
    16.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    16.2 +++ b/patches/linux-2.6.16.33/git-4bfaaef01a1badb9e8ffb0c0a37cd2379008d21f.patch	Thu Nov 30 13:05:27 2006 +0000
    16.3 @@ -0,0 +1,375 @@
    16.4 +From: Magnus Damm <magnus@valinux.co.jp>
    16.5 +Date: Tue, 26 Sep 2006 08:52:38 +0000 (+0200)
    16.6 +Subject: [PATCH] Avoid overwriting the current pgd (V4, x86_64)
    16.7 +X-Git-Tag: v2.6.19-rc1
    16.8 +X-Git-Url: http://www.kernel.org/git/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff;h=4bfaaef01a1badb9e8ffb0c0a37cd2379008d21f
    16.9 +
   16.10 +[PATCH] Avoid overwriting the current pgd (V4, x86_64)
   16.11 +
   16.12 +kexec: Avoid overwriting the current pgd (V4, x86_64)
   16.13 +
   16.14 +This patch upgrades the x86_64-specific kexec code to avoid overwriting the
   16.15 +current pgd. Overwriting the current pgd is bad when CONFIG_CRASH_DUMP is used
   16.16 +to start a secondary kernel that dumps the memory of the previous kernel.
   16.17 +
   16.18 +The code introduces a new set of page tables. These tables are used to provide
   16.19 +an executable identity mapping without overwriting the current pgd.
   16.20 +
   16.21 +Signed-off-by: Magnus Damm <magnus@valinux.co.jp>
   16.22 +Signed-off-by: Andi Kleen <ak@suse.de>
   16.23 +---
   16.24 +
   16.25 +--- a/arch/x86_64/kernel/machine_kexec.c
   16.26 ++++ b/arch/x86_64/kernel/machine_kexec.c
   16.27 +@@ -15,6 +15,15 @@
   16.28 + #include <asm/mmu_context.h>
   16.29 + #include <asm/io.h>
   16.30 + 
   16.31 ++#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
   16.32 ++static u64 kexec_pgd[512] PAGE_ALIGNED;
   16.33 ++static u64 kexec_pud0[512] PAGE_ALIGNED;
   16.34 ++static u64 kexec_pmd0[512] PAGE_ALIGNED;
   16.35 ++static u64 kexec_pte0[512] PAGE_ALIGNED;
   16.36 ++static u64 kexec_pud1[512] PAGE_ALIGNED;
   16.37 ++static u64 kexec_pmd1[512] PAGE_ALIGNED;
   16.38 ++static u64 kexec_pte1[512] PAGE_ALIGNED;
   16.39 ++
   16.40 + static void init_level2_page(pmd_t *level2p, unsigned long addr)
   16.41 + {
   16.42 + 	unsigned long end_addr;
   16.43 +@@ -144,32 +153,19 @@ static void load_segments(void)
   16.44 + 		);
   16.45 + }
   16.46 + 
   16.47 +-typedef NORET_TYPE void (*relocate_new_kernel_t)(unsigned long indirection_page,
   16.48 +-					unsigned long control_code_buffer,
   16.49 +-					unsigned long start_address,
   16.50 +-					unsigned long pgtable) ATTRIB_NORET;
   16.51 +-
   16.52 +-extern const unsigned char relocate_new_kernel[];
   16.53 +-extern const unsigned long relocate_new_kernel_size;
   16.54 +-
   16.55 + int machine_kexec_prepare(struct kimage *image)
   16.56 + {
   16.57 +-	unsigned long start_pgtable, control_code_buffer;
   16.58 ++	unsigned long start_pgtable;
   16.59 + 	int result;
   16.60 + 
   16.61 + 	/* Calculate the offsets */
   16.62 + 	start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT;
   16.63 +-	control_code_buffer = start_pgtable + PAGE_SIZE;
   16.64 + 
   16.65 + 	/* Setup the identity mapped 64bit page table */
   16.66 + 	result = init_pgtable(image, start_pgtable);
   16.67 + 	if (result)
   16.68 + 		return result;
   16.69 + 
   16.70 +-	/* Place the code in the reboot code buffer */
   16.71 +-	memcpy(__va(control_code_buffer), relocate_new_kernel,
   16.72 +-						relocate_new_kernel_size);
   16.73 +-
   16.74 + 	return 0;
   16.75 + }
   16.76 + 
   16.77 +@@ -184,28 +180,34 @@ void machine_kexec_cleanup(struct kimage
   16.78 +  */
   16.79 + NORET_TYPE void machine_kexec(struct kimage *image)
   16.80 + {
   16.81 +-	unsigned long page_list;
   16.82 +-	unsigned long control_code_buffer;
   16.83 +-	unsigned long start_pgtable;
   16.84 +-	relocate_new_kernel_t rnk;
   16.85 ++	unsigned long page_list[PAGES_NR];
   16.86 ++	void *control_page;
   16.87 + 
   16.88 + 	/* Interrupts aren't acceptable while we reboot */
   16.89 + 	local_irq_disable();
   16.90 + 
   16.91 +-	/* Calculate the offsets */
   16.92 +-	page_list = image->head;
   16.93 +-	start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT;
   16.94 +-	control_code_buffer = start_pgtable + PAGE_SIZE;
   16.95 ++	control_page = page_address(image->control_code_page) + PAGE_SIZE;
   16.96 ++	memcpy(control_page, relocate_kernel, PAGE_SIZE);
   16.97 + 
   16.98 +-	/* Set the low half of the page table to my identity mapped
   16.99 +-	 * page table for kexec.  Leave the high half pointing at the
  16.100 +-	 * kernel pages.   Don't bother to flush the global pages
  16.101 +-	 * as that will happen when I fully switch to my identity mapped
  16.102 +-	 * page table anyway.
  16.103 +-	 */
  16.104 +-	memcpy(__va(read_cr3()), __va(start_pgtable), PAGE_SIZE/2);
  16.105 +-	__flush_tlb();
  16.106 ++	page_list[PA_CONTROL_PAGE] = __pa(control_page);
  16.107 ++	page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel;
  16.108 ++	page_list[PA_PGD] = __pa(kexec_pgd);
  16.109 ++	page_list[VA_PGD] = (unsigned long)kexec_pgd;
  16.110 ++	page_list[PA_PUD_0] = __pa(kexec_pud0);
  16.111 ++	page_list[VA_PUD_0] = (unsigned long)kexec_pud0;
  16.112 ++	page_list[PA_PMD_0] = __pa(kexec_pmd0);
  16.113 ++	page_list[VA_PMD_0] = (unsigned long)kexec_pmd0;
  16.114 ++	page_list[PA_PTE_0] = __pa(kexec_pte0);
  16.115 ++	page_list[VA_PTE_0] = (unsigned long)kexec_pte0;
  16.116 ++	page_list[PA_PUD_1] = __pa(kexec_pud1);
  16.117 ++	page_list[VA_PUD_1] = (unsigned long)kexec_pud1;
  16.118 ++	page_list[PA_PMD_1] = __pa(kexec_pmd1);
  16.119 ++	page_list[VA_PMD_1] = (unsigned long)kexec_pmd1;
  16.120 ++	page_list[PA_PTE_1] = __pa(kexec_pte1);
  16.121 ++	page_list[VA_PTE_1] = (unsigned long)kexec_pte1;
  16.122 + 
  16.123 ++	page_list[PA_TABLE_PAGE] =
  16.124 ++	  (unsigned long)__pa(page_address(image->control_code_page));
  16.125 + 
  16.126 + 	/* The segment registers are funny things, they have both a
  16.127 + 	 * visible and an invisible part.  Whenever the visible part is
  16.128 +@@ -222,9 +224,10 @@ NORET_TYPE void machine_kexec(struct kim
  16.129 + 	 */
  16.130 + 	set_gdt(phys_to_virt(0),0);
  16.131 + 	set_idt(phys_to_virt(0),0);
  16.132 ++
  16.133 + 	/* now call it */
  16.134 +-	rnk = (relocate_new_kernel_t) control_code_buffer;
  16.135 +-	(*rnk)(page_list, control_code_buffer, image->start, start_pgtable);
  16.136 ++	relocate_kernel((unsigned long)image->head, (unsigned long)page_list,
  16.137 ++			image->start);
  16.138 + }
  16.139 + 
  16.140 + /* crashkernel=size@addr specifies the location to reserve for
  16.141 +--- a/arch/x86_64/kernel/relocate_kernel.S
  16.142 ++++ b/arch/x86_64/kernel/relocate_kernel.S
  16.143 +@@ -7,31 +7,169 @@
  16.144 +  */
  16.145 + 
  16.146 + #include <linux/linkage.h>
  16.147 ++#include <asm/page.h>
  16.148 ++#include <asm/kexec.h>
  16.149 + 
  16.150 +-	/*
  16.151 +-	 * Must be relocatable PIC code callable as a C function, that once
  16.152 +-	 * it starts can not use the previous processes stack.
  16.153 +-	 */
  16.154 +-	.globl relocate_new_kernel
  16.155 ++/*
  16.156 ++ * Must be relocatable PIC code callable as a C function
  16.157 ++ */
  16.158 ++
  16.159 ++#define PTR(x) (x << 3)
  16.160 ++#define PAGE_ALIGNED (1 << PAGE_SHIFT)
  16.161 ++#define PAGE_ATTR 0x63 /* _PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY */
  16.162 ++
  16.163 ++	.text
  16.164 ++	.align PAGE_ALIGNED
  16.165 + 	.code64
  16.166 ++	.globl relocate_kernel
  16.167 ++relocate_kernel:
  16.168 ++	/* %rdi indirection_page
  16.169 ++	 * %rsi page_list
  16.170 ++	 * %rdx start address
  16.171 ++	 */
  16.172 ++
  16.173 ++	/* map the control page at its virtual address */
  16.174 ++
  16.175 ++	movq	$0x0000ff8000000000, %r10        /* mask */
  16.176 ++	mov	$(39 - 3), %cl                   /* bits to shift */
  16.177 ++	movq	PTR(VA_CONTROL_PAGE)(%rsi), %r11 /* address to map */
  16.178 ++
  16.179 ++	movq	%r11, %r9
  16.180 ++	andq	%r10, %r9
  16.181 ++	shrq	%cl, %r9
  16.182 ++
  16.183 ++	movq	PTR(VA_PGD)(%rsi), %r8
  16.184 ++	addq	%r8, %r9
  16.185 ++	movq	PTR(PA_PUD_0)(%rsi), %r8
  16.186 ++	orq	$PAGE_ATTR, %r8
  16.187 ++	movq	%r8, (%r9)
  16.188 ++
  16.189 ++	shrq	$9, %r10
  16.190 ++	sub	$9, %cl
  16.191 ++
  16.192 ++	movq	%r11, %r9
  16.193 ++	andq	%r10, %r9
  16.194 ++	shrq	%cl, %r9
  16.195 ++
  16.196 ++	movq	PTR(VA_PUD_0)(%rsi), %r8
  16.197 ++	addq	%r8, %r9
  16.198 ++	movq	PTR(PA_PMD_0)(%rsi), %r8
  16.199 ++	orq	$PAGE_ATTR, %r8
  16.200 ++	movq	%r8, (%r9)
  16.201 ++
  16.202 ++	shrq	$9, %r10
  16.203 ++	sub	$9, %cl
  16.204 ++
  16.205 ++	movq	%r11, %r9
  16.206 ++	andq	%r10, %r9
  16.207 ++	shrq	%cl, %r9
  16.208 ++
  16.209 ++	movq	PTR(VA_PMD_0)(%rsi), %r8
  16.210 ++	addq	%r8, %r9
  16.211 ++	movq	PTR(PA_PTE_0)(%rsi), %r8
  16.212 ++	orq	$PAGE_ATTR, %r8
  16.213 ++	movq	%r8, (%r9)
  16.214 ++
  16.215 ++	shrq	$9, %r10
  16.216 ++	sub	$9, %cl
  16.217 ++
  16.218 ++	movq	%r11, %r9
  16.219 ++	andq	%r10, %r9
  16.220 ++	shrq	%cl, %r9
  16.221 ++
  16.222 ++	movq	PTR(VA_PTE_0)(%rsi), %r8
  16.223 ++	addq	%r8, %r9
  16.224 ++	movq	PTR(PA_CONTROL_PAGE)(%rsi), %r8
  16.225 ++	orq	$PAGE_ATTR, %r8
  16.226 ++	movq	%r8, (%r9)
  16.227 ++
  16.228 ++	/* identity map the control page at its physical address */
  16.229 ++
  16.230 ++	movq	$0x0000ff8000000000, %r10        /* mask */
  16.231 ++	mov	$(39 - 3), %cl                   /* bits to shift */
  16.232 ++	movq	PTR(PA_CONTROL_PAGE)(%rsi), %r11 /* address to map */
  16.233 ++
  16.234 ++	movq	%r11, %r9
  16.235 ++	andq	%r10, %r9
  16.236 ++	shrq	%cl, %r9
  16.237 ++
  16.238 ++	movq	PTR(VA_PGD)(%rsi), %r8
  16.239 ++	addq	%r8, %r9
  16.240 ++	movq	PTR(PA_PUD_1)(%rsi), %r8
  16.241 ++	orq	$PAGE_ATTR, %r8
  16.242 ++	movq	%r8, (%r9)
  16.243 ++
  16.244 ++	shrq	$9, %r10
  16.245 ++	sub	$9, %cl
  16.246 ++
  16.247 ++	movq	%r11, %r9
  16.248 ++	andq	%r10, %r9
  16.249 ++	shrq	%cl, %r9
  16.250 ++
  16.251 ++	movq	PTR(VA_PUD_1)(%rsi), %r8
  16.252 ++	addq	%r8, %r9
  16.253 ++	movq	PTR(PA_PMD_1)(%rsi), %r8
  16.254 ++	orq	$PAGE_ATTR, %r8
  16.255 ++	movq	%r8, (%r9)
  16.256 ++
  16.257 ++	shrq	$9, %r10
  16.258 ++	sub	$9, %cl
  16.259 ++
  16.260 ++	movq	%r11, %r9
  16.261 ++	andq	%r10, %r9
  16.262 ++	shrq	%cl, %r9
  16.263 ++
  16.264 ++	movq	PTR(VA_PMD_1)(%rsi), %r8
  16.265 ++	addq	%r8, %r9
  16.266 ++	movq	PTR(PA_PTE_1)(%rsi), %r8
  16.267 ++	orq	$PAGE_ATTR, %r8
  16.268 ++	movq	%r8, (%r9)
  16.269 ++
  16.270 ++	shrq	$9, %r10
  16.271 ++	sub	$9, %cl
  16.272 ++
  16.273 ++	movq	%r11, %r9
  16.274 ++	andq	%r10, %r9
  16.275 ++	shrq	%cl, %r9
  16.276 ++
  16.277 ++	movq	PTR(VA_PTE_1)(%rsi), %r8
  16.278 ++	addq	%r8, %r9
  16.279 ++	movq	PTR(PA_CONTROL_PAGE)(%rsi), %r8
  16.280 ++	orq	$PAGE_ATTR, %r8
  16.281 ++	movq	%r8, (%r9)
  16.282 ++
  16.283 + relocate_new_kernel:
  16.284 +-	/* %rdi page_list
  16.285 +-	 * %rsi reboot_code_buffer
  16.286 ++	/* %rdi indirection_page
  16.287 ++	 * %rsi page_list
  16.288 + 	 * %rdx start address
  16.289 +-	 * %rcx page_table
  16.290 +-	 * %r8  arg5
  16.291 +-	 * %r9  arg6
  16.292 + 	 */
  16.293 + 
  16.294 + 	/* zero out flags, and disable interrupts */
  16.295 + 	pushq $0
  16.296 + 	popfq
  16.297 + 
  16.298 +-	/* set a new stack at the bottom of our page... */
  16.299 +-	lea   4096(%rsi), %rsp
  16.300 ++	/* get physical address of control page now */
  16.301 ++	/* this is impossible after page table switch */
  16.302 ++	movq	PTR(PA_CONTROL_PAGE)(%rsi), %r8
  16.303 ++
  16.304 ++	/* get physical address of page table now too */
  16.305 ++	movq	PTR(PA_TABLE_PAGE)(%rsi), %rcx
  16.306 ++
  16.307 ++	/* switch to new set of page tables */
  16.308 ++	movq	PTR(PA_PGD)(%rsi), %r9
  16.309 ++	movq	%r9, %cr3
  16.310 ++
  16.311 ++	/* setup a new stack at the end of the physical control page */
  16.312 ++	lea	4096(%r8), %rsp
  16.313 ++
  16.314 ++	/* jump to identity mapped page */
  16.315 ++	addq	$(identity_mapped - relocate_kernel), %r8
  16.316 ++	pushq	%r8
  16.317 ++	ret
  16.318 + 
  16.319 +-	/* store the parameters back on the stack */
  16.320 +-	pushq	%rdx /* store the start address */
  16.321 ++identity_mapped:
  16.322 ++	/* store the start address on the stack */
  16.323 ++	pushq   %rdx
  16.324 + 
  16.325 + 	/* Set cr0 to a known state:
  16.326 + 	 * 31 1 == Paging enabled
  16.327 +@@ -136,8 +274,3 @@ relocate_new_kernel:
  16.328 + 	xorq	%r15, %r15
  16.329 + 
  16.330 + 	ret
  16.331 +-relocate_new_kernel_end:
  16.332 +-
  16.333 +-	.globl relocate_new_kernel_size
  16.334 +-relocate_new_kernel_size:
  16.335 +-	.quad relocate_new_kernel_end - relocate_new_kernel
  16.336 +--- a/include/asm-x86_64/kexec.h
  16.337 ++++ b/include/asm-x86_64/kexec.h
  16.338 +@@ -1,6 +1,27 @@
  16.339 + #ifndef _X86_64_KEXEC_H
  16.340 + #define _X86_64_KEXEC_H
  16.341 + 
  16.342 ++#define PA_CONTROL_PAGE  0
  16.343 ++#define VA_CONTROL_PAGE  1
  16.344 ++#define PA_PGD           2
  16.345 ++#define VA_PGD           3
  16.346 ++#define PA_PUD_0         4
  16.347 ++#define VA_PUD_0         5
  16.348 ++#define PA_PMD_0         6
  16.349 ++#define VA_PMD_0         7
  16.350 ++#define PA_PTE_0         8
  16.351 ++#define VA_PTE_0         9
  16.352 ++#define PA_PUD_1         10
  16.353 ++#define VA_PUD_1         11
  16.354 ++#define PA_PMD_1         12
  16.355 ++#define VA_PMD_1         13
  16.356 ++#define PA_PTE_1         14
  16.357 ++#define VA_PTE_1         15
  16.358 ++#define PA_TABLE_PAGE    16
  16.359 ++#define PAGES_NR         17
  16.360 ++
  16.361 ++#ifndef __ASSEMBLY__
  16.362 ++
  16.363 + #include <linux/string.h>
  16.364 + 
  16.365 + #include <asm/page.h>
  16.366 +@@ -64,4 +85,12 @@ static inline void crash_setup_regs(stru
  16.367 + 		newregs->rip = (unsigned long)current_text_addr();
  16.368 + 	}
  16.369 + }
  16.370 ++
  16.371 ++NORET_TYPE void
  16.372 ++relocate_kernel(unsigned long indirection_page,
  16.373 ++		unsigned long page_list,
  16.374 ++		unsigned long start_address) ATTRIB_NORET;
  16.375 ++
  16.376 ++#endif /* __ASSEMBLY__ */
  16.377 ++
  16.378 + #endif /* _X86_64_KEXEC_H */
    17.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    17.2 +++ b/patches/linux-2.6.16.33/git-dbaab49f92ff6ae6255762a948375e4036cbdbd2.patch	Thu Nov 30 13:05:27 2006 +0000
    17.3 @@ -0,0 +1,47 @@
    17.4 +commit dbaab49f92ff6ae6255762a948375e4036cbdbd2
    17.5 +Author: Vivek Goyal <vgoyal@in.ibm.com>
    17.6 +Date:   Sat Oct 21 18:37:03 2006 +0200
    17.7 +
    17.8 +    [PATCH] x86-64: Overlapping program headers in physical addr space fix
    17.9 +    
   17.10 +    o A recent change to vmlinux.ld.S file broke kexec as now resulting vmlinux
   17.11 +      program headers are overlapping in physical address space.
   17.12 +    
   17.13 +    o Now all the vsyscall related sections are placed after data and after
   17.14 +      that mostly init data sections are placed. To avoid physical overlap
   17.15 +      among phdrs, there are three possible solutions.
   17.16 +    	- Place vsyscall sections also in data phdrs instead of user
   17.17 +    	- move vsyscal sections after init data in bss.
   17.18 +    	- create another phdrs say data.init and move all the sections
   17.19 +    	  after vsyscall into this new phdr.
   17.20 +    
   17.21 +    o This patch implements the third solution.
   17.22 +    
   17.23 +    Signed-off-by: Vivek Goyal <vgoyal@in.ibm.com>
   17.24 +    Signed-off-by: Andi Kleen <ak@suse.de>
   17.25 +    Cc: Magnus Damm <magnus@valinux.co.jp>
   17.26 +    Cc: Andi Kleen <ak@suse.de>
   17.27 +    Cc: "Eric W. Biederman" <ebiederm@xmission.com>
   17.28 +    Signed-off-by: Andrew Morton <akpm@osdl.org>
   17.29 +
   17.30 +diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S
   17.31 +index b9df2ab..1283614 100644
   17.32 +--- a/arch/x86_64/kernel/vmlinux.lds.S
   17.33 ++++ b/arch/x86_64/kernel/vmlinux.lds.S
   17.34 +@@ -17,6 +17,7 @@ PHDRS {
   17.35 + 	text PT_LOAD FLAGS(5);	/* R_E */
   17.36 + 	data PT_LOAD FLAGS(7);	/* RWE */
   17.37 + 	user PT_LOAD FLAGS(7);	/* RWE */
   17.38 ++	data.init PT_LOAD FLAGS(7);	/* RWE */
   17.39 + 	note PT_NOTE FLAGS(4);	/* R__ */
   17.40 + }
   17.41 + SECTIONS
   17.42 +@@ -131,7 +132,7 @@ SECTIONS
   17.43 +   . = ALIGN(8192);		/* init_task */
   17.44 +   .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) {
   17.45 + 	*(.data.init_task)
   17.46 +-  } :data
   17.47 ++  }:data.init
   17.48 + 
   17.49 +   . = ALIGN(4096);
   17.50 +   .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) {
    18.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    18.2 +++ b/patches/linux-2.6.16.33/kexec-generic.patch	Thu Nov 30 13:05:27 2006 +0000
    18.3 @@ -0,0 +1,228 @@
    18.4 +--- 0001/include/linux/kexec.h
    18.5 ++++ work/include/linux/kexec.h
    18.6 +@@ -31,6 +31,13 @@
    18.7 + #error KEXEC_ARCH not defined
    18.8 + #endif
    18.9 + 
   18.10 ++#ifndef KEXEC_ARCH_HAS_PAGE_MACROS
   18.11 ++#define kexec_page_to_pfn(page)  page_to_pfn(page)
   18.12 ++#define kexec_pfn_to_page(pfn)   pfn_to_page(pfn)
   18.13 ++#define kexec_virt_to_phys(addr) virt_to_phys(addr)
   18.14 ++#define kexec_phys_to_virt(addr) phys_to_virt(addr)
   18.15 ++#endif
   18.16 ++
   18.17 + /*
   18.18 +  * This structure is used to hold the arguments that are used when loading
   18.19 +  * kernel binaries.
   18.20 +@@ -91,6 +98,13 @@ struct kimage {
   18.21 + extern NORET_TYPE void machine_kexec(struct kimage *image) ATTRIB_NORET;
   18.22 + extern int machine_kexec_prepare(struct kimage *image);
   18.23 + extern void machine_kexec_cleanup(struct kimage *image);
   18.24 ++#ifdef CONFIG_XEN
   18.25 ++extern int xen_machine_kexec_load(struct kimage *image);
   18.26 ++extern void xen_machine_kexec_unload(struct kimage *image);
   18.27 ++extern NORET_TYPE void xen_machine_kexec(struct kimage *image) ATTRIB_NORET;
   18.28 ++extern void xen_machine_kexec_setup_resources(void);
   18.29 ++extern void xen_machine_kexec_register_resources(struct resource *res);
   18.30 ++#endif
   18.31 + extern asmlinkage long sys_kexec_load(unsigned long entry,
   18.32 + 					unsigned long nr_segments,
   18.33 + 					struct kexec_segment __user *segments,
   18.34 +--- 0001/kernel/kexec.c
   18.35 ++++ work/kernel/kexec.c
   18.36 +@@ -403,7 +403,7 @@ static struct page *kimage_alloc_normal_
   18.37 + 		pages = kimage_alloc_pages(GFP_KERNEL, order);
   18.38 + 		if (!pages)
   18.39 + 			break;
   18.40 +-		pfn   = page_to_pfn(pages);
   18.41 ++		pfn   = kexec_page_to_pfn(pages);
   18.42 + 		epfn  = pfn + count;
   18.43 + 		addr  = pfn << PAGE_SHIFT;
   18.44 + 		eaddr = epfn << PAGE_SHIFT;
   18.45 +@@ -437,6 +437,7 @@ static struct page *kimage_alloc_normal_
   18.46 + 	return pages;
   18.47 + }
   18.48 + 
   18.49 ++#ifndef CONFIG_XEN
   18.50 + static struct page *kimage_alloc_crash_control_pages(struct kimage *image,
   18.51 + 						      unsigned int order)
   18.52 + {
   18.53 +@@ -490,7 +491,7 @@ static struct page *kimage_alloc_crash_c
   18.54 + 		}
   18.55 + 		/* If I don't overlap any segments I have found my hole! */
   18.56 + 		if (i == image->nr_segments) {
   18.57 +-			pages = pfn_to_page(hole_start >> PAGE_SHIFT);
   18.58 ++			pages = kexec_pfn_to_page(hole_start >> PAGE_SHIFT);
   18.59 + 			break;
   18.60 + 		}
   18.61 + 	}
   18.62 +@@ -517,6 +518,13 @@ struct page *kimage_alloc_control_pages(
   18.63 + 
   18.64 + 	return pages;
   18.65 + }
   18.66 ++#else /* !CONFIG_XEN */
   18.67 ++struct page *kimage_alloc_control_pages(struct kimage *image,
   18.68 ++					 unsigned int order)
   18.69 ++{
   18.70 ++	return kimage_alloc_normal_control_pages(image, order);
   18.71 ++}
   18.72 ++#endif
   18.73 + 
   18.74 + static int kimage_add_entry(struct kimage *image, kimage_entry_t entry)
   18.75 + {
   18.76 +@@ -532,7 +540,7 @@ static int kimage_add_entry(struct kimag
   18.77 + 			return -ENOMEM;
   18.78 + 
   18.79 + 		ind_page = page_address(page);
   18.80 +-		*image->entry = virt_to_phys(ind_page) | IND_INDIRECTION;
   18.81 ++		*image->entry = kexec_virt_to_phys(ind_page) | IND_INDIRECTION;
   18.82 + 		image->entry = ind_page;
   18.83 + 		image->last_entry = ind_page +
   18.84 + 				      ((PAGE_SIZE/sizeof(kimage_entry_t)) - 1);
   18.85 +@@ -593,13 +601,13 @@ static int kimage_terminate(struct kimag
   18.86 + #define for_each_kimage_entry(image, ptr, entry) \
   18.87 + 	for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); \
   18.88 + 		ptr = (entry & IND_INDIRECTION)? \
   18.89 +-			phys_to_virt((entry & PAGE_MASK)): ptr +1)
   18.90 ++			kexec_phys_to_virt((entry & PAGE_MASK)): ptr +1)
   18.91 + 
   18.92 + static void kimage_free_entry(kimage_entry_t entry)
   18.93 + {
   18.94 + 	struct page *page;
   18.95 + 
   18.96 +-	page = pfn_to_page(entry >> PAGE_SHIFT);
   18.97 ++	page = kexec_pfn_to_page(entry >> PAGE_SHIFT);
   18.98 + 	kimage_free_pages(page);
   18.99 + }
  18.100 + 
  18.101 +@@ -611,6 +619,10 @@ static void kimage_free(struct kimage *i
  18.102 + 	if (!image)
  18.103 + 		return;
  18.104 + 
  18.105 ++#ifdef CONFIG_XEN
  18.106 ++	xen_machine_kexec_unload(image);
  18.107 ++#endif
  18.108 ++
  18.109 + 	kimage_free_extra_pages(image);
  18.110 + 	for_each_kimage_entry(image, ptr, entry) {
  18.111 + 		if (entry & IND_INDIRECTION) {
  18.112 +@@ -686,7 +698,7 @@ static struct page *kimage_alloc_page(st
  18.113 + 	 * have a match.
  18.114 + 	 */
  18.115 + 	list_for_each_entry(page, &image->dest_pages, lru) {
  18.116 +-		addr = page_to_pfn(page) << PAGE_SHIFT;
  18.117 ++		addr = kexec_page_to_pfn(page) << PAGE_SHIFT;
  18.118 + 		if (addr == destination) {
  18.119 + 			list_del(&page->lru);
  18.120 + 			return page;
  18.121 +@@ -701,12 +713,12 @@ static struct page *kimage_alloc_page(st
  18.122 + 		if (!page)
  18.123 + 			return NULL;
  18.124 + 		/* If the page cannot be used file it away */
  18.125 +-		if (page_to_pfn(page) >
  18.126 ++		if (kexec_page_to_pfn(page) >
  18.127 + 				(KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) {
  18.128 + 			list_add(&page->lru, &image->unuseable_pages);
  18.129 + 			continue;
  18.130 + 		}
  18.131 +-		addr = page_to_pfn(page) << PAGE_SHIFT;
  18.132 ++		addr = kexec_page_to_pfn(page) << PAGE_SHIFT;
  18.133 + 
  18.134 + 		/* If it is the destination page we want use it */
  18.135 + 		if (addr == destination)
  18.136 +@@ -729,7 +741,7 @@ static struct page *kimage_alloc_page(st
  18.137 + 			struct page *old_page;
  18.138 + 
  18.139 + 			old_addr = *old & PAGE_MASK;
  18.140 +-			old_page = pfn_to_page(old_addr >> PAGE_SHIFT);
  18.141 ++			old_page = kexec_pfn_to_page(old_addr >> PAGE_SHIFT);
  18.142 + 			copy_highpage(page, old_page);
  18.143 + 			*old = addr | (*old & ~PAGE_MASK);
  18.144 + 
  18.145 +@@ -779,7 +791,7 @@ static int kimage_load_normal_segment(st
  18.146 + 			result  = -ENOMEM;
  18.147 + 			goto out;
  18.148 + 		}
  18.149 +-		result = kimage_add_page(image, page_to_pfn(page)
  18.150 ++		result = kimage_add_page(image, kexec_page_to_pfn(page)
  18.151 + 								<< PAGE_SHIFT);
  18.152 + 		if (result < 0)
  18.153 + 			goto out;
  18.154 +@@ -811,6 +823,7 @@ out:
  18.155 + 	return result;
  18.156 + }
  18.157 + 
  18.158 ++#ifndef CONFIG_XEN
  18.159 + static int kimage_load_crash_segment(struct kimage *image,
  18.160 + 					struct kexec_segment *segment)
  18.161 + {
  18.162 +@@ -833,7 +846,7 @@ static int kimage_load_crash_segment(str
  18.163 + 		char *ptr;
  18.164 + 		size_t uchunk, mchunk;
  18.165 + 
  18.166 +-		page = pfn_to_page(maddr >> PAGE_SHIFT);
  18.167 ++		page = kexec_pfn_to_page(maddr >> PAGE_SHIFT);
  18.168 + 		if (page == 0) {
  18.169 + 			result  = -ENOMEM;
  18.170 + 			goto out;
  18.171 +@@ -881,6 +894,13 @@ static int kimage_load_segment(struct ki
  18.172 + 
  18.173 + 	return result;
  18.174 + }
  18.175 ++#else /* CONFIG_XEN */
  18.176 ++static int kimage_load_segment(struct kimage *image,
  18.177 ++				struct kexec_segment *segment)
  18.178 ++{
  18.179 ++	return kimage_load_normal_segment(image, segment);
  18.180 ++}
  18.181 ++#endif
  18.182 + 
  18.183 + /*
  18.184 +  * Exec Kernel system call: for obvious reasons only root may call it.
  18.185 +@@ -991,6 +1011,11 @@ asmlinkage long sys_kexec_load(unsigned 
  18.186 + 		if (result)
  18.187 + 			goto out;
  18.188 + 	}
  18.189 ++#ifdef CONFIG_XEN
  18.190 ++	result = xen_machine_kexec_load(image);
  18.191 ++	if (result)
  18.192 ++		goto out;
  18.193 ++#endif
  18.194 + 	/* Install the new kernel, and  Uninstall the old */
  18.195 + 	image = xchg(dest_image, image);
  18.196 + 
  18.197 +@@ -1045,7 +1070,6 @@ void crash_kexec(struct pt_regs *regs)
  18.198 + 	struct kimage *image;
  18.199 + 	int locked;
  18.200 + 
  18.201 +-
  18.202 + 	/* Take the kexec_lock here to prevent sys_kexec_load
  18.203 + 	 * running on one cpu from replacing the crash kernel
  18.204 + 	 * we are using after a panic on a different cpu.
  18.205 +@@ -1061,7 +1085,11 @@ void crash_kexec(struct pt_regs *regs)
  18.206 + 			struct pt_regs fixed_regs;
  18.207 + 			crash_setup_regs(&fixed_regs, regs);
  18.208 + 			machine_crash_shutdown(&fixed_regs);
  18.209 ++#ifdef CONFIG_XEN
  18.210 ++			xen_machine_kexec(image);
  18.211 ++#else
  18.212 + 			machine_kexec(image);
  18.213 ++#endif
  18.214 + 		}
  18.215 + 		xchg(&kexec_lock, 0);
  18.216 + 	}
  18.217 +--- 0002/kernel/sys.c
  18.218 ++++ work/kernel/sys.c
  18.219 +@@ -435,8 +435,12 @@ void kernel_kexec(void)
  18.220 + 	kernel_restart_prepare(NULL);
  18.221 + 	printk(KERN_EMERG "Starting new kernel\n");
  18.222 + 	machine_shutdown();
  18.223 ++#ifdef CONFIG_XEN
  18.224 ++	xen_machine_kexec(image);
  18.225 ++#else
  18.226 + 	machine_kexec(image);
  18.227 + #endif
  18.228 ++#endif
  18.229 + }
  18.230 + EXPORT_SYMBOL_GPL(kernel_kexec);
  18.231 + 
    19.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    19.2 +++ b/patches/linux-2.6.16.33/linux-2.6.19-rc1-kexec-move_segment_code-i386.patch	Thu Nov 30 13:05:27 2006 +0000
    19.3 @@ -0,0 +1,169 @@
    19.4 +kexec: Move asm segment handling code to the assembly file (i386)
    19.5 +
    19.6 +This patch moves the idt, gdt, and segment handling code from machine_kexec.c
    19.7 +to relocate_kernel.S. The main reason behind this move is to avoid code 
    19.8 +duplication in the Xen hypervisor. With this patch all code required to kexec
    19.9 +is put on the control page.
   19.10 +
   19.11 +On top of that this patch also counts as a cleanup - I think it is much
   19.12 +nicer to write assembly directly in assembly files than wrap inline assembly
   19.13 +in C functions for no apparent reason.
   19.14 +
   19.15 +Signed-off-by: Magnus Damm <magnus@valinux.co.jp>
   19.16 +---
   19.17 +
   19.18 + Applies to 2.6.19-rc1.
   19.19 +
   19.20 + machine_kexec.c   |   59 -----------------------------------------------------
   19.21 + relocate_kernel.S |   58 +++++++++++++++++++++++++++++++++++++++++++++++-----
   19.22 + 2 files changed, 53 insertions(+), 64 deletions(-)
   19.23 +
   19.24 +--- 0002/arch/i386/kernel/machine_kexec.c
   19.25 ++++ work/arch/i386/kernel/machine_kexec.c	2006-10-05 15:49:08.000000000 +0900
   19.26 +@@ -29,48 +29,6 @@ static u32 kexec_pmd1[1024] PAGE_ALIGNED
   19.27 + static u32 kexec_pte0[1024] PAGE_ALIGNED;
   19.28 + static u32 kexec_pte1[1024] PAGE_ALIGNED;
   19.29 + 
   19.30 +-static void set_idt(void *newidt, __u16 limit)
   19.31 +-{
   19.32 +-	struct Xgt_desc_struct curidt;
   19.33 +-
   19.34 +-	/* ia32 supports unaliged loads & stores */
   19.35 +-	curidt.size    = limit;
   19.36 +-	curidt.address = (unsigned long)newidt;
   19.37 +-
   19.38 +-	load_idt(&curidt);
   19.39 +-};
   19.40 +-
   19.41 +-
   19.42 +-static void set_gdt(void *newgdt, __u16 limit)
   19.43 +-{
   19.44 +-	struct Xgt_desc_struct curgdt;
   19.45 +-
   19.46 +-	/* ia32 supports unaligned loads & stores */
   19.47 +-	curgdt.size    = limit;
   19.48 +-	curgdt.address = (unsigned long)newgdt;
   19.49 +-
   19.50 +-	load_gdt(&curgdt);
   19.51 +-};
   19.52 +-
   19.53 +-static void load_segments(void)
   19.54 +-{
   19.55 +-#define __STR(X) #X
   19.56 +-#define STR(X) __STR(X)
   19.57 +-
   19.58 +-	__asm__ __volatile__ (
   19.59 +-		"\tljmp $"STR(__KERNEL_CS)",$1f\n"
   19.60 +-		"\t1:\n"
   19.61 +-		"\tmovl $"STR(__KERNEL_DS)",%%eax\n"
   19.62 +-		"\tmovl %%eax,%%ds\n"
   19.63 +-		"\tmovl %%eax,%%es\n"
   19.64 +-		"\tmovl %%eax,%%fs\n"
   19.65 +-		"\tmovl %%eax,%%gs\n"
   19.66 +-		"\tmovl %%eax,%%ss\n"
   19.67 +-		::: "eax", "memory");
   19.68 +-#undef STR
   19.69 +-#undef __STR
   19.70 +-}
   19.71 +-
   19.72 + /*
   19.73 +  * A architecture hook called to validate the
   19.74 +  * proposed image and prepare the control pages
   19.75 +@@ -127,23 +85,6 @@ NORET_TYPE void machine_kexec(struct kim
   19.76 + 	page_list[PA_PTE_1] = __pa(kexec_pte1);
   19.77 + 	page_list[VA_PTE_1] = (unsigned long)kexec_pte1;
   19.78 + 
   19.79 +-	/* The segment registers are funny things, they have both a
   19.80 +-	 * visible and an invisible part.  Whenever the visible part is
   19.81 +-	 * set to a specific selector, the invisible part is loaded
   19.82 +-	 * with from a table in memory.  At no other time is the
   19.83 +-	 * descriptor table in memory accessed.
   19.84 +-	 *
   19.85 +-	 * I take advantage of this here by force loading the
   19.86 +-	 * segments, before I zap the gdt with an invalid value.
   19.87 +-	 */
   19.88 +-	load_segments();
   19.89 +-	/* The gdt & idt are now invalid.
   19.90 +-	 * If you want to load them you must set up your own idt & gdt.
   19.91 +-	 */
   19.92 +-	set_gdt(phys_to_virt(0),0);
   19.93 +-	set_idt(phys_to_virt(0),0);
   19.94 +-
   19.95 +-	/* now call it */
   19.96 + 	relocate_kernel((unsigned long)image->head, (unsigned long)page_list,
   19.97 + 			image->start, cpu_has_pae);
   19.98 + }
   19.99 +--- 0002/arch/i386/kernel/relocate_kernel.S
  19.100 ++++ work/arch/i386/kernel/relocate_kernel.S	2006-10-05 16:03:21.000000000 +0900
  19.101 +@@ -154,14 +154,45 @@ relocate_new_kernel:
  19.102 + 	movl	PTR(PA_PGD)(%ebp), %eax
  19.103 + 	movl	%eax, %cr3
  19.104 + 
  19.105 ++	/* setup idt */
  19.106 ++	movl	%edi, %eax
  19.107 ++	addl	$(idt_48 - relocate_kernel), %eax
  19.108 ++	lidtl	(%eax)
  19.109 ++
  19.110 ++	/* setup gdt */
  19.111 ++	movl	%edi, %eax
  19.112 ++	addl	$(gdt - relocate_kernel), %eax
  19.113 ++	movl	%edi, %esi
  19.114 ++	addl	$((gdt_48 - relocate_kernel) + 2), %esi
  19.115 ++	movl	%eax, (%esi)
  19.116 ++	
  19.117 ++	movl	%edi, %eax
  19.118 ++	addl	$(gdt_48 - relocate_kernel), %eax
  19.119 ++	lgdtl	(%eax)
  19.120 ++
  19.121 ++	/* setup data segment registers */
  19.122 ++	mov	$(gdt_ds - gdt), %eax
  19.123 ++	mov	%eax, %ds
  19.124 ++	mov	%eax, %es
  19.125 ++	mov	%eax, %fs
  19.126 ++	mov	%eax, %gs
  19.127 ++	mov	%eax, %ss
  19.128 ++	
  19.129 + 	/* setup a new stack at the end of the physical control page */
  19.130 + 	lea	4096(%edi), %esp
  19.131 + 
  19.132 +-	/* jump to identity mapped page */
  19.133 +-	movl    %edi, %eax
  19.134 +-	addl    $(identity_mapped - relocate_kernel), %eax
  19.135 +-	pushl   %eax
  19.136 +-	ret
  19.137 ++	/* load new code segment and jump to identity mapped page */
  19.138 ++	movl	%edi, %esi
  19.139 ++	xorl	%eax, %eax
  19.140 ++	pushl	%eax
  19.141 ++	pushl	%esi
  19.142 ++	pushl	%eax
  19.143 ++	movl	$(gdt_cs - gdt), %eax
  19.144 ++	pushl	%eax	
  19.145 ++	movl	%edi, %eax
  19.146 ++	addl	$(identity_mapped - relocate_kernel),%eax
  19.147 ++	pushl	%eax
  19.148 ++	iretl
  19.149 + 
  19.150 + identity_mapped:
  19.151 + 	/* store the start address on the stack */
  19.152 +@@ -250,3 +281,20 @@ identity_mapped:
  19.153 + 	xorl    %edi, %edi
  19.154 + 	xorl    %ebp, %ebp
  19.155 + 	ret
  19.156 ++
  19.157 ++	.align	16
  19.158 ++gdt:
  19.159 ++	.quad	0x0000000000000000	/* NULL descriptor */
  19.160 ++gdt_cs:	
  19.161 ++	.quad	0x00cf9a000000ffff	/* kernel 4GB code at 0x00000000 */
  19.162 ++gdt_ds:
  19.163 ++	.quad	0x00cf92000000ffff	/* kernel 4GB data at 0x00000000 */
  19.164 ++gdt_end:
  19.165 ++	
  19.166 ++gdt_48:
  19.167 ++	.word	gdt_end - gdt - 1	/* limit */
  19.168 ++	.long	0			/* base - filled in by code above */
  19.169 ++
  19.170 ++idt_48:
  19.171 ++	.word	0			/* limit */
  19.172 ++	.long	0			/* base */
    20.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    20.2 +++ b/patches/linux-2.6.16.33/linux-2.6.19-rc1-kexec-move_segment_code-x86_64.patch	Thu Nov 30 13:05:27 2006 +0000
    20.3 @@ -0,0 +1,161 @@
    20.4 +kexec: Move asm segment handling code to the assembly file (x86_64)
    20.5 +
    20.6 +This patch moves the idt, gdt, and segment handling code from machine_kexec.c
    20.7 +to relocate_kernel.S.  The main reason behind this move is to avoid code 
    20.8 +duplication in the Xen hypervisor. With this patch all code required to kexec
    20.9 +is put on the control page.
   20.10 +
   20.11 +On top of that this patch also counts as a cleanup - I think it is much
   20.12 +nicer to write assembly directly in assembly files than wrap inline assembly
   20.13 +in C functions for no apparent reason.
   20.14 +
   20.15 +Signed-off-by: Magnus Damm <magnus@valinux.co.jp>
   20.16 +---
   20.17 +
   20.18 + Applies to 2.6.19-rc1.
   20.19 +
   20.20 + machine_kexec.c   |   58 -----------------------------------------------------
   20.21 + relocate_kernel.S |   50 +++++++++++++++++++++++++++++++++++++++++----
   20.22 + 2 files changed, 45 insertions(+), 63 deletions(-)
   20.23 +
   20.24 +--- 0002/arch/x86_64/kernel/machine_kexec.c
   20.25 ++++ work/arch/x86_64/kernel/machine_kexec.c	2006-10-05 16:15:49.000000000 +0900
   20.26 +@@ -112,47 +112,6 @@ static int init_pgtable(struct kimage *i
   20.27 +  	return init_level4_page(image, level4p, 0, end_pfn << PAGE_SHIFT);
   20.28 + }
   20.29 + 
   20.30 +-static void set_idt(void *newidt, u16 limit)
   20.31 +-{
   20.32 +-	struct desc_ptr curidt;
   20.33 +-
   20.34 +-	/* x86-64 supports unaliged loads & stores */
   20.35 +-	curidt.size    = limit;
   20.36 +-	curidt.address = (unsigned long)newidt;
   20.37 +-
   20.38 +-	__asm__ __volatile__ (
   20.39 +-		"lidtq %0\n"
   20.40 +-		: : "m" (curidt)
   20.41 +-		);
   20.42 +-};
   20.43 +-
   20.44 +-
   20.45 +-static void set_gdt(void *newgdt, u16 limit)
   20.46 +-{
   20.47 +-	struct desc_ptr curgdt;
   20.48 +-
   20.49 +-	/* x86-64 supports unaligned loads & stores */
   20.50 +-	curgdt.size    = limit;
   20.51 +-	curgdt.address = (unsigned long)newgdt;
   20.52 +-
   20.53 +-	__asm__ __volatile__ (
   20.54 +-		"lgdtq %0\n"
   20.55 +-		: : "m" (curgdt)
   20.56 +-		);
   20.57 +-};
   20.58 +-
   20.59 +-static void load_segments(void)
   20.60 +-{
   20.61 +-	__asm__ __volatile__ (
   20.62 +-		"\tmovl %0,%%ds\n"
   20.63 +-		"\tmovl %0,%%es\n"
   20.64 +-		"\tmovl %0,%%ss\n"
   20.65 +-		"\tmovl %0,%%fs\n"
   20.66 +-		"\tmovl %0,%%gs\n"
   20.67 +-		: : "a" (__KERNEL_DS) : "memory"
   20.68 +-		);
   20.69 +-}
   20.70 +-
   20.71 + int machine_kexec_prepare(struct kimage *image)
   20.72 + {
   20.73 + 	unsigned long start_pgtable;
   20.74 +@@ -209,23 +168,6 @@ NORET_TYPE void machine_kexec(struct kim
   20.75 + 	page_list[PA_TABLE_PAGE] =
   20.76 + 	  (unsigned long)__pa(page_address(image->control_code_page));
   20.77 + 
   20.78 +-	/* The segment registers are funny things, they have both a
   20.79 +-	 * visible and an invisible part.  Whenever the visible part is
   20.80 +-	 * set to a specific selector, the invisible part is loaded
   20.81 +-	 * with from a table in memory.  At no other time is the
   20.82 +-	 * descriptor table in memory accessed.
   20.83 +-	 *
   20.84 +-	 * I take advantage of this here by force loading the
   20.85 +-	 * segments, before I zap the gdt with an invalid value.
   20.86 +-	 */
   20.87 +-	load_segments();
   20.88 +-	/* The gdt & idt are now invalid.
   20.89 +-	 * If you want to load them you must set up your own idt & gdt.
   20.90 +-	 */
   20.91 +-	set_gdt(phys_to_virt(0),0);
   20.92 +-	set_idt(phys_to_virt(0),0);
   20.93 +-
   20.94 +-	/* now call it */
   20.95 + 	relocate_kernel((unsigned long)image->head, (unsigned long)page_list,
   20.96 + 			image->start);
   20.97 + }
   20.98 +--- 0002/arch/x86_64/kernel/relocate_kernel.S
   20.99 ++++ work/arch/x86_64/kernel/relocate_kernel.S	2006-10-05 16:18:07.000000000 +0900
  20.100 +@@ -159,13 +159,39 @@ relocate_new_kernel:
  20.101 + 	movq	PTR(PA_PGD)(%rsi), %r9
  20.102 + 	movq	%r9, %cr3
  20.103 + 
  20.104 ++	/* setup idt */
  20.105 ++	movq    %r8, %rax
  20.106 ++	addq    $(idt_80 - relocate_kernel), %rax
  20.107 ++	lidtq   (%rax)
  20.108 ++
  20.109 ++	/* setup gdt */
  20.110 ++	movq    %r8, %rax
  20.111 ++	addq    $(gdt - relocate_kernel), %rax
  20.112 ++	movq    %r8, %r9
  20.113 ++	addq    $((gdt_80 - relocate_kernel) + 2), %r9
  20.114 ++	movq    %rax, (%r9)
  20.115 ++
  20.116 ++	movq    %r8, %rax
  20.117 ++	addq    $(gdt_80 - relocate_kernel), %rax
  20.118 ++	lgdtq   (%rax)
  20.119 ++
  20.120 ++	/* setup data segment registers */
  20.121 ++	xorl	%eax, %eax
  20.122 ++	movl    %eax, %ds
  20.123 ++	movl    %eax, %es
  20.124 ++	movl    %eax, %fs
  20.125 ++	movl    %eax, %gs
  20.126 ++	movl    %eax, %ss
  20.127 ++	
  20.128 + 	/* setup a new stack at the end of the physical control page */
  20.129 + 	lea	4096(%r8), %rsp
  20.130 + 
  20.131 +-	/* jump to identity mapped page */
  20.132 +-	addq	$(identity_mapped - relocate_kernel), %r8
  20.133 +-	pushq	%r8
  20.134 +-	ret
  20.135 ++	/* load new code segment and jump to identity mapped page */
  20.136 ++	movq	%r8, %rax
  20.137 ++	addq    $(identity_mapped - relocate_kernel), %rax
  20.138 ++	pushq	$(gdt_cs - gdt)
  20.139 ++	pushq	%rax
  20.140 ++	lretq
  20.141 + 
  20.142 + identity_mapped:
  20.143 + 	/* store the start address on the stack */
  20.144 +@@ -272,5 +298,19 @@ identity_mapped:
  20.145 + 	xorq	%r13, %r13
  20.146 + 	xorq	%r14, %r14
  20.147 + 	xorq	%r15, %r15
  20.148 +-
  20.149 + 	ret
  20.150 ++
  20.151 ++	.align  16
  20.152 ++gdt:
  20.153 ++	.quad	0x0000000000000000	/* NULL descriptor */
  20.154 ++gdt_cs:
  20.155 ++	.quad   0x00af9a000000ffff
  20.156 ++gdt_end:
  20.157 ++
  20.158 ++gdt_80:
  20.159 ++	.word	gdt_end - gdt - 1	/* limit */
  20.160 ++	.quad	0			/* base - filled in by code above */
  20.161 ++
  20.162 ++idt_80:
  20.163 ++	.word	0			/* limit */
  20.164 ++	.quad	0			/* base */
    21.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    21.2 +++ b/patches/linux-2.6.16.33/linux-2.6.19-rc1-kexec-xen-i386.patch	Thu Nov 30 13:05:27 2006 +0000
    21.3 @@ -0,0 +1,108 @@
    21.4 +--- 0001/arch/i386/kernel/crash.c
    21.5 ++++ work/arch/i386/kernel/crash.c
    21.6 +@@ -90,6 +90,7 @@ static void crash_save_self(struct pt_re
    21.7 + 	crash_save_this_cpu(regs, cpu);
    21.8 + }
    21.9 + 
   21.10 ++#ifndef CONFIG_XEN
   21.11 + #ifdef CONFIG_SMP
   21.12 + static atomic_t waiting_for_crash_ipi;
   21.13 + 
   21.14 +@@ -158,6 +159,7 @@ static void nmi_shootdown_cpus(void)
   21.15 + 	/* There are no cpus to shootdown */
   21.16 + }
   21.17 + #endif
   21.18 ++#endif /* CONFIG_XEN */
   21.19 + 
   21.20 + void machine_crash_shutdown(struct pt_regs *regs)
   21.21 + {
   21.22 +@@ -174,10 +176,12 @@ void machine_crash_shutdown(struct pt_re
   21.23 + 
   21.24 + 	/* Make a note of crashing cpu. Will be used in NMI callback.*/
   21.25 + 	crashing_cpu = smp_processor_id();
   21.26 ++#ifndef CONFIG_XEN
   21.27 + 	nmi_shootdown_cpus();
   21.28 + 	lapic_shutdown();
   21.29 + #if defined(CONFIG_X86_IO_APIC)
   21.30 + 	disable_IO_APIC();
   21.31 + #endif
   21.32 ++#endif /* CONFIG_XEN */
   21.33 + 	crash_save_self(regs);
   21.34 + }
   21.35 +--- 0007/arch/i386/kernel/machine_kexec.c
   21.36 ++++ work/arch/i386/kernel/machine_kexec.c
   21.37 +@@ -19,6 +19,10 @@
   21.38 + #include <asm/desc.h>
   21.39 + #include <asm/system.h>
   21.40 + 
   21.41 ++#ifdef CONFIG_XEN
   21.42 ++#include <xen/interface/kexec.h>
   21.43 ++#endif
   21.44 ++
   21.45 + #define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
   21.46 + static u32 kexec_pgd[1024] PAGE_ALIGNED;
   21.47 + #ifdef CONFIG_X86_PAE
   21.48 +@@ -28,6 +32,40 @@ static u32 kexec_pmd1[1024] PAGE_ALIGNED
   21.49 + static u32 kexec_pte0[1024] PAGE_ALIGNED;
   21.50 + static u32 kexec_pte1[1024] PAGE_ALIGNED;
   21.51 + 
   21.52 ++#ifdef CONFIG_XEN
   21.53 ++
   21.54 ++#define __ma(x) (pfn_to_mfn(__pa((x)) >> PAGE_SHIFT) << PAGE_SHIFT)
   21.55 ++
   21.56 ++#if PAGES_NR > KEXEC_XEN_NO_PAGES
   21.57 ++#error PAGES_NR is greater than KEXEC_XEN_NO_PAGES - Xen support will break
   21.58 ++#endif
   21.59 ++
   21.60 ++#if PA_CONTROL_PAGE != 0
   21.61 ++#error PA_CONTROL_PAGE is non zero - Xen support will break
   21.62 ++#endif
   21.63 ++
   21.64 ++void machine_kexec_setup_load_arg(xen_kexec_image_t *xki, struct kimage *image)
   21.65 ++{
   21.66 ++	void *control_page;
   21.67 ++
   21.68 ++	memset(xki->page_list, 0, sizeof(xki->page_list));
   21.69 ++
   21.70 ++	control_page = page_address(image->control_code_page);
   21.71 ++	memcpy(control_page, relocate_kernel, PAGE_SIZE);
   21.72 ++
   21.73 ++	xki->page_list[PA_CONTROL_PAGE] = __ma(control_page);
   21.74 ++	xki->page_list[PA_PGD] = __ma(kexec_pgd);
   21.75 ++#ifdef CONFIG_X86_PAE
   21.76 ++	xki->page_list[PA_PMD_0] = __ma(kexec_pmd0);
   21.77 ++	xki->page_list[PA_PMD_1] = __ma(kexec_pmd1);
   21.78 ++#endif
   21.79 ++	xki->page_list[PA_PTE_0] = __ma(kexec_pte0);
   21.80 ++	xki->page_list[PA_PTE_1] = __ma(kexec_pte1);
   21.81 ++
   21.82 ++}
   21.83 ++
   21.84 ++#endif /* CONFIG_XEN */
   21.85 ++
   21.86 + /*
   21.87 +  * A architecture hook called to validate the
   21.88 +  * proposed image and prepare the control pages
   21.89 +--- 0006/include/asm-i386/kexec.h
   21.90 ++++ work/include/asm-i386/kexec.h
   21.91 +@@ -98,6 +98,20 @@ relocate_kernel(unsigned long indirectio
   21.92 + 		unsigned long start_address,
   21.93 + 		unsigned int has_pae) ATTRIB_NORET;
   21.94 + 
   21.95 ++
   21.96 ++/* Under Xen we need to work with machine addresses. These macros give the
   21.97 ++ * machine address of a certain page to the generic kexec code instead of 
   21.98 ++ * the pseudo physical address which would be given by the default macros.
   21.99 ++ */
  21.100 ++
  21.101 ++#ifdef CONFIG_XEN
  21.102 ++#define KEXEC_ARCH_HAS_PAGE_MACROS
  21.103 ++#define kexec_page_to_pfn(page)  pfn_to_mfn(page_to_pfn(page))
  21.104 ++#define kexec_pfn_to_page(pfn)   pfn_to_page(mfn_to_pfn(pfn))
  21.105 ++#define kexec_virt_to_phys(addr) virt_to_machine(addr)
  21.106 ++#define kexec_phys_to_virt(addr) phys_to_virt(machine_to_phys(addr))
  21.107 ++#endif
  21.108 ++
  21.109 + #endif /* __ASSEMBLY__ */
  21.110 + 
  21.111 + #endif /* _I386_KEXEC_H */
    22.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    22.2 +++ b/patches/linux-2.6.16.33/linux-2.6.19-rc1-kexec-xen-x86_64.patch	Thu Nov 30 13:05:27 2006 +0000
    22.3 @@ -0,0 +1,219 @@
    22.4 +--- 0001/arch/x86_64/kernel/crash.c
    22.5 ++++ work/arch/x86_64/kernel/crash.c
    22.6 +@@ -92,6 +92,7 @@ static void crash_save_self(struct pt_re
    22.7 + 	crash_save_this_cpu(regs, cpu);
    22.8 + }
    22.9 + 
   22.10 ++#ifndef CONFIG_XEN
   22.11 + #ifdef CONFIG_SMP
   22.12 + static atomic_t waiting_for_crash_ipi;
   22.13 + 
   22.14 +@@ -156,6 +157,7 @@ static void nmi_shootdown_cpus(void)
   22.15 + 	/* There are no cpus to shootdown */
   22.16 + }
   22.17 + #endif
   22.18 ++#endif /* CONFIG_XEN */
   22.19 + 
   22.20 + void machine_crash_shutdown(struct pt_regs *regs)
   22.21 + {
   22.22 +@@ -173,6 +175,8 @@ void machine_crash_shutdown(struct pt_re
   22.23 + 
   22.24 + 	/* Make a note of crashing cpu. Will be used in NMI callback.*/
   22.25 + 	crashing_cpu = smp_processor_id();
   22.26 ++
   22.27 ++#ifndef CONFIG_XEN
   22.28 + 	nmi_shootdown_cpus();
   22.29 + 
   22.30 + 	if(cpu_has_apic)
   22.31 +@@ -181,6 +185,6 @@ void machine_crash_shutdown(struct pt_re
   22.32 + #if defined(CONFIG_X86_IO_APIC)
   22.33 + 	disable_IO_APIC();
   22.34 + #endif
   22.35 +-
   22.36 ++#endif /* CONFIG_XEN */
   22.37 + 	crash_save_self(regs);
   22.38 + }
   22.39 +--- 0010/arch/x86_64/kernel/machine_kexec.c
   22.40 ++++ work/arch/x86_64/kernel/machine_kexec.c
   22.41 +@@ -24,6 +24,104 @@ static u64 kexec_pud1[512] PAGE_ALIGNED;
   22.42 + static u64 kexec_pmd1[512] PAGE_ALIGNED;
   22.43 + static u64 kexec_pte1[512] PAGE_ALIGNED;
   22.44 + 
   22.45 ++#ifdef CONFIG_XEN
   22.46 ++
   22.47 ++/* In the case of Xen, override hypervisor functions to be able to create
   22.48 ++ * a regular identity mapping page table...
   22.49 ++ */
   22.50 ++
   22.51 ++#include <xen/interface/kexec.h>
   22.52 ++#include <xen/interface/memory.h>
   22.53 ++
   22.54 ++#define x__pmd(x) ((pmd_t) { (x) } )
   22.55 ++#define x__pud(x) ((pud_t) { (x) } )
   22.56 ++#define x__pgd(x) ((pgd_t) { (x) } )
   22.57 ++
   22.58 ++#define x_pmd_val(x)   ((x).pmd)
   22.59 ++#define x_pud_val(x)   ((x).pud)
   22.60 ++#define x_pgd_val(x)   ((x).pgd)
   22.61 ++
   22.62 ++static inline void x_set_pmd(pmd_t *dst, pmd_t val)
   22.63 ++{
   22.64 ++	x_pmd_val(*dst) = x_pmd_val(val);
   22.65 ++}
   22.66 ++
   22.67 ++static inline void x_set_pud(pud_t *dst, pud_t val)
   22.68 ++{
   22.69 ++	x_pud_val(*dst) = phys_to_machine(x_pud_val(val));
   22.70 ++}
   22.71 ++
   22.72 ++static inline void x_pud_clear (pud_t *pud)
   22.73 ++{
   22.74 ++	x_pud_val(*pud) = 0;
   22.75 ++}
   22.76 ++
   22.77 ++static inline void x_set_pgd(pgd_t *dst, pgd_t val)
   22.78 ++{
   22.79 ++	x_pgd_val(*dst) = phys_to_machine(x_pgd_val(val));
   22.80 ++}
   22.81 ++
   22.82 ++static inline void x_pgd_clear (pgd_t * pgd)
   22.83 ++{
   22.84 ++	x_pgd_val(*pgd) = 0;
   22.85 ++}
   22.86 ++
   22.87 ++#define X__PAGE_KERNEL_LARGE_EXEC \
   22.88 ++         _PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_PSE
   22.89 ++#define X_KERNPG_TABLE _PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY
   22.90 ++
   22.91 ++#define __ma(x) (pfn_to_mfn(__pa((x)) >> PAGE_SHIFT) << PAGE_SHIFT)
   22.92 ++
   22.93 ++#if PAGES_NR > KEXEC_XEN_NO_PAGES
   22.94 ++#error PAGES_NR is greater than KEXEC_XEN_NO_PAGES - Xen support will break
   22.95 ++#endif
   22.96 ++
   22.97 ++#if PA_CONTROL_PAGE != 0
   22.98 ++#error PA_CONTROL_PAGE is non zero - Xen support will break
   22.99 ++#endif
  22.100 ++
  22.101 ++void machine_kexec_setup_load_arg(xen_kexec_image_t *xki, struct kimage *image)
  22.102 ++{
  22.103 ++	void *control_page;
  22.104 ++	void *table_page;
  22.105 ++
  22.106 ++	memset(xki->page_list, 0, sizeof(xki->page_list));
  22.107 ++
  22.108 ++	control_page = page_address(image->control_code_page) + PAGE_SIZE;
  22.109 ++	memcpy(control_page, relocate_kernel, PAGE_SIZE);
  22.110 ++
  22.111 ++	table_page = page_address(image->control_code_page);
  22.112 ++
  22.113 ++	xki->page_list[PA_CONTROL_PAGE] = __ma(control_page);
  22.114 ++	xki->page_list[PA_TABLE_PAGE] = __ma(table_page);
  22.115 ++
  22.116 ++	xki->page_list[PA_PGD] = __ma(kexec_pgd);
  22.117 ++	xki->page_list[PA_PUD_0] = __ma(kexec_pud0);
  22.118 ++	xki->page_list[PA_PUD_1] = __ma(kexec_pud1);
  22.119 ++	xki->page_list[PA_PMD_0] = __ma(kexec_pmd0);
  22.120 ++	xki->page_list[PA_PMD_1] = __ma(kexec_pmd1);
  22.121 ++	xki->page_list[PA_PTE_0] = __ma(kexec_pte0);
  22.122 ++	xki->page_list[PA_PTE_1] = __ma(kexec_pte1);
  22.123 ++}
  22.124 ++
  22.125 ++#else /* CONFIG_XEN */
  22.126 ++
  22.127 ++#define x__pmd(x) __pmd(x)
  22.128 ++#define x__pud(x) __pud(x)
  22.129 ++#define x__pgd(x) __pgd(x)
  22.130 ++
  22.131 ++#define x_set_pmd(x, y) set_pmd(x, y)
  22.132 ++#define x_set_pud(x, y) set_pud(x, y)
  22.133 ++#define x_set_pgd(x, y) set_pgd(x, y)
  22.134 ++
  22.135 ++#define x_pud_clear(x) pud_clear(x)
  22.136 ++#define x_pgd_clear(x) pgd_clear(x)
  22.137 ++
  22.138 ++#define X__PAGE_KERNEL_LARGE_EXEC __PAGE_KERNEL_LARGE_EXEC
  22.139 ++#define X_KERNPG_TABLE _KERNPG_TABLE
  22.140 ++
  22.141 ++#endif /* CONFIG_XEN */
  22.142 ++
  22.143 + static void init_level2_page(pmd_t *level2p, unsigned long addr)
  22.144 + {
  22.145 + 	unsigned long end_addr;
  22.146 +@@ -31,7 +129,7 @@ static void init_level2_page(pmd_t *leve
  22.147 + 	addr &= PAGE_MASK;
  22.148 + 	end_addr = addr + PUD_SIZE;
  22.149 + 	while (addr < end_addr) {
  22.150 +-		set_pmd(level2p++, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC));
  22.151 ++		x_set_pmd(level2p++, x__pmd(addr | X__PAGE_KERNEL_LARGE_EXEC));
  22.152 + 		addr += PMD_SIZE;
  22.153 + 	}
  22.154 + }
  22.155 +@@ -56,12 +154,12 @@ static int init_level3_page(struct kimag
  22.156 + 		}
  22.157 + 		level2p = (pmd_t *)page_address(page);
  22.158 + 		init_level2_page(level2p, addr);
  22.159 +-		set_pud(level3p++, __pud(__pa(level2p) | _KERNPG_TABLE));
  22.160 ++		x_set_pud(level3p++, x__pud(__pa(level2p) | X_KERNPG_TABLE));
  22.161 + 		addr += PUD_SIZE;
  22.162 + 	}
  22.163 + 	/* clear the unused entries */
  22.164 + 	while (addr < end_addr) {
  22.165 +-		pud_clear(level3p++);
  22.166 ++		x_pud_clear(level3p++);
  22.167 + 		addr += PUD_SIZE;
  22.168 + 	}
  22.169 + out:
  22.170 +@@ -92,12 +190,12 @@ static int init_level4_page(struct kimag
  22.171 + 		if (result) {
  22.172 + 			goto out;
  22.173 + 		}
  22.174 +-		set_pgd(level4p++, __pgd(__pa(level3p) | _KERNPG_TABLE));
  22.175 ++		x_set_pgd(level4p++, x__pgd(__pa(level3p) | X_KERNPG_TABLE));
  22.176 + 		addr += PGDIR_SIZE;
  22.177 + 	}
  22.178 + 	/* clear the unused entries */
  22.179 + 	while (addr < end_addr) {
  22.180 +-		pgd_clear(level4p++);
  22.181 ++		x_pgd_clear(level4p++);
  22.182 + 		addr += PGDIR_SIZE;
  22.183 + 	}
  22.184 + out:
  22.185 +@@ -108,8 +206,14 @@ out:
  22.186 + static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
  22.187 + {
  22.188 + 	pgd_t *level4p;
  22.189 ++	unsigned long x_end_pfn = end_pfn;
  22.190 ++
  22.191 ++#ifdef CONFIG_XEN
  22.192 ++	x_end_pfn = HYPERVISOR_memory_op(XENMEM_maximum_ram_page, NULL);
  22.193 ++#endif
  22.194 ++
  22.195 + 	level4p = (pgd_t *)__va(start_pgtable);
  22.196 +- 	return init_level4_page(image, level4p, 0, end_pfn << PAGE_SHIFT);
  22.197 ++ 	return init_level4_page(image, level4p, 0, x_end_pfn << PAGE_SHIFT);
  22.198 + }
  22.199 + 
  22.200 + int machine_kexec_prepare(struct kimage *image)
  22.201 +--- 0009/include/asm-x86_64/kexec.h
  22.202 ++++ work/include/asm-x86_64/kexec.h
  22.203 +@@ -91,6 +91,19 @@ relocate_kernel(unsigned long indirectio
  22.204 + 		unsigned long page_list,
  22.205 + 		unsigned long start_address) ATTRIB_NORET;
  22.206 + 
  22.207 ++/* Under Xen we need to work with machine addresses. These macros give the
  22.208 ++ * machine address of a certain page to the generic kexec code instead of 
  22.209 ++ * the pseudo physical address which would be given by the default macros.
  22.210 ++ */
  22.211 ++
  22.212 ++#ifdef CONFIG_XEN
  22.213 ++#define KEXEC_ARCH_HAS_PAGE_MACROS
  22.214 ++#define kexec_page_to_pfn(page)  pfn_to_mfn(page_to_pfn(page))
  22.215 ++#define kexec_pfn_to_page(pfn)   pfn_to_page(mfn_to_pfn(pfn))
  22.216 ++#define kexec_virt_to_phys(addr) virt_to_machine(addr)
  22.217 ++#define kexec_phys_to_virt(addr) phys_to_virt(machine_to_phys(addr))
  22.218 ++#endif
  22.219 ++
  22.220 + #endif /* __ASSEMBLY__ */
  22.221 + 
  22.222 + #endif /* _X86_64_KEXEC_H */
    23.1 --- a/patches/linux-2.6.16.33/series	Thu Nov 30 10:57:28 2006 +0000
    23.2 +++ b/patches/linux-2.6.16.33/series	Thu Nov 30 13:05:27 2006 +0000
    23.3 @@ -1,3 +1,12 @@
    23.4 +kexec-generic.patch
    23.5 +git-2efe55a9cec8418f0e0cde3dc3787a42fddc4411.patch
    23.6 +git-2a8a3d5b65e86ec1dfef7d268c64a909eab94af7.patch
    23.7 +git-3566561bfadffcb5dbc85d576be80c0dbf2cccc9.patch
    23.8 +linux-2.6.19-rc1-kexec-move_segment_code-i386.patch
    23.9 +linux-2.6.19-rc1-kexec-xen-i386.patch
   23.10 +git-4bfaaef01a1badb9e8ffb0c0a37cd2379008d21f.patch
   23.11 +linux-2.6.19-rc1-kexec-move_segment_code-x86_64.patch
   23.12 +linux-2.6.19-rc1-kexec-xen-x86_64.patch
   23.13  blktap-aio-16_03_06.patch
   23.14  device_bind.patch
   23.15  fix-hz-suspend.patch
   23.16 @@ -22,6 +31,7 @@ xen-hotplug.patch
   23.17  xenoprof-generic.patch
   23.18  x86-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch
   23.19  x86_64-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch
   23.20 +git-dbaab49f92ff6ae6255762a948375e4036cbdbd2.patch
   23.21  x86-elfnote-as-preprocessor-macro.patch
   23.22  vsnprintf.patch
   23.23  kasprintf.patch
    24.1 --- a/xen/arch/ia64/xen/Makefile	Thu Nov 30 10:57:28 2006 +0000
    24.2 +++ b/xen/arch/ia64/xen/Makefile	Thu Nov 30 13:05:27 2006 +0000
    24.3 @@ -1,3 +1,5 @@
    24.4 +obj-y += machine_kexec.o
    24.5 +obj-y += crash.o
    24.6  obj-y += acpi.o
    24.7  obj-y += dom0_ops.o
    24.8  obj-y += domain.o
    25.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    25.2 +++ b/xen/arch/ia64/xen/crash.c	Thu Nov 30 13:05:27 2006 +0000
    25.3 @@ -0,0 +1,19 @@
    25.4 +#include <xen/lib.h>       /* for printk() used in stub */
    25.5 +#include <xen/types.h>
    25.6 +#include <public/kexec.h>
    25.7 +
    25.8 +void machine_crash_shutdown(void)
    25.9 +{
   25.10 +    printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__);
   25.11 +}
   25.12 +
   25.13 +/*
   25.14 + * Local variables:
   25.15 + * mode: C
   25.16 + * c-set-style: "BSD"
   25.17 + * c-basic-offset: 4
   25.18 + * tab-width: 4
   25.19 + * indent-tabs-mode: nil
   25.20 + * End:
   25.21 + */
   25.22 +
    26.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    26.2 +++ b/xen/arch/ia64/xen/machine_kexec.c	Thu Nov 30 13:05:27 2006 +0000
    26.3 @@ -0,0 +1,34 @@
    26.4 +#include <xen/lib.h>       /* for printk() used in stubs */
    26.5 +#include <xen/types.h>
    26.6 +#include <public/kexec.h>
    26.7 +
    26.8 +int machine_kexec_load(int type, int slot, xen_kexec_image_t *image)
    26.9 +{
   26.10 +    printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__);
   26.11 +    return -1;
   26.12 +}
   26.13 +
   26.14 +void machine_kexec_unload(int type, int slot, xen_kexec_image_t *image)
   26.15 +{
   26.16 +    printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__);
   26.17 +}
   26.18 +
   26.19 +void machine_kexec(xen_kexec_image_t *image)
   26.20 +{
   26.21 +    printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__);
   26.22 +}
   26.23 +
   26.24 +void machine_shutdown(xen_kexec_image_t *image)
   26.25 +{
   26.26 +    printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__);
   26.27 +}
   26.28 +
   26.29 +/*
   26.30 + * Local variables:
   26.31 + * mode: C
   26.32 + * c-set-style: "BSD"
   26.33 + * c-basic-offset: 4
   26.34 + * tab-width: 4
   26.35 + * indent-tabs-mode: nil
   26.36 + * End:
   26.37 + */
    27.1 --- a/xen/arch/powerpc/Makefile	Thu Nov 30 10:57:28 2006 +0000
    27.2 +++ b/xen/arch/powerpc/Makefile	Thu Nov 30 13:05:27 2006 +0000
    27.3 @@ -40,6 +40,8 @@ obj-y += smp-tbsync.o
    27.4  obj-y += sysctl.o
    27.5  obj-y += time.o
    27.6  obj-y += usercopy.o
    27.7 +obj-y += machine_kexec.o
    27.8 +obj-y += crash.o
    27.9  
   27.10  obj-$(debug) += 0opt.o
   27.11  obj-$(crash_debug) += gdbstub.o
    28.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    28.2 +++ b/xen/arch/powerpc/crash.c	Thu Nov 30 13:05:27 2006 +0000
    28.3 @@ -0,0 +1,19 @@
    28.4 +#include <xen/lib.h>       /* for printk() used in stub */
    28.5 +#include <xen/types.h>
    28.6 +#include <public/kexec.h>
    28.7 +
    28.8 +void machine_crash_shutdown(void)
    28.9 +{
   28.10 +    printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__);
   28.11 +}
   28.12 +
   28.13 +/*
   28.14 + * Local variables:
   28.15 + * mode: C
   28.16 + * c-set-style: "BSD"
   28.17 + * c-basic-offset: 4
   28.18 + * tab-width: 4
   28.19 + * indent-tabs-mode: nil
   28.20 + * End:
   28.21 + */
   28.22 +
    29.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    29.2 +++ b/xen/arch/powerpc/machine_kexec.c	Thu Nov 30 13:05:27 2006 +0000
    29.3 @@ -0,0 +1,34 @@
    29.4 +#include <xen/lib.h>       /* for printk() used in stubs */
    29.5 +#include <xen/types.h>
    29.6 +#include <public/kexec.h>
    29.7 +
    29.8 +int machine_kexec_load(int type, int slot, xen_kexec_image_t *image)
    29.9 +{
   29.10 +    printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__);
   29.11 +    return -1;
   29.12 +}
   29.13 +
   29.14 +void machine_kexec_unload(int type, int slot, xen_kexec_image_t *image)
   29.15 +{
   29.16 +    printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__);
   29.17 +}
   29.18 +
   29.19 +void machine_kexec(xen_kexec_image_t *image)
   29.20 +{
   29.21 +    printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__);
   29.22 +}
   29.23 +
   29.24 +void machine_shutdown(xen_kexec_image_t *image)
   29.25 +{
   29.26 +    printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__);
   29.27 +}
   29.28 +
   29.29 +/*
   29.30 + * Local variables:
   29.31 + * mode: C
   29.32 + * c-set-style: "BSD"
   29.33 + * c-basic-offset: 4
   29.34 + * tab-width: 4
   29.35 + * indent-tabs-mode: nil
   29.36 + * End:
   29.37 + */
    30.1 --- a/xen/arch/x86/Makefile	Thu Nov 30 10:57:28 2006 +0000
    30.2 +++ b/xen/arch/x86/Makefile	Thu Nov 30 13:05:27 2006 +0000
    30.3 @@ -43,6 +43,8 @@ obj-y += trampoline.o
    30.4  obj-y += traps.o
    30.5  obj-y += usercopy.o
    30.6  obj-y += x86_emulate.o
    30.7 +obj-y += machine_kexec.o
    30.8 +obj-y += crash.o
    30.9  
   30.10  obj-$(crash_debug) += gdbstub.o
   30.11  
    31.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    31.2 +++ b/xen/arch/x86/crash.c	Thu Nov 30 13:05:27 2006 +0000
    31.3 @@ -0,0 +1,128 @@
    31.4 +/******************************************************************************
    31.5 + * crash.c
    31.6 + *
    31.7 + * Based heavily on arch/i386/kernel/crash.c from Linux 2.6.16
    31.8 + *
    31.9 + * Xen port written by:
   31.10 + * - Simon 'Horms' Horman <horms@verge.net.au>
   31.11 + * - Magnus Damm <magnus@valinux.co.jp>
   31.12 + */
   31.13 +
   31.14 +#include <asm/atomic.h>
   31.15 +#include <asm/elf.h>
   31.16 +#include <asm/percpu.h>
   31.17 +#include <asm/kexec.h>
   31.18 +#include <xen/types.h>
   31.19 +#include <xen/irq.h>
   31.20 +#include <asm/ipi.h>
   31.21 +#include <asm/nmi.h>
   31.22 +#include <xen/string.h>
   31.23 +#include <xen/elf.h>
   31.24 +#include <xen/elfcore.h>
   31.25 +#include <xen/smp.h>
   31.26 +#include <xen/delay.h>
   31.27 +#include <xen/perfc.h>
   31.28 +#include <xen/kexec.h>
   31.29 +#include <xen/sched.h>
   31.30 +#include <public/xen.h>
   31.31 +#include <asm/hvm/hvm.h>
   31.32 +
   31.33 +#ifdef CONFIG_SMP
   31.34 +static atomic_t waiting_for_crash_ipi;
   31.35 +
   31.36 +static int crash_nmi_callback(struct cpu_user_regs *regs, int cpu)
   31.37 +{
   31.38 +    /* Don't do anything if this handler is invoked on crashing cpu.
   31.39 +     * Otherwise, system will completely hang. Crashing cpu can get
   31.40 +     * an NMI if system was initially booted with nmi_watchdog parameter.
   31.41 +     */
   31.42 +    if ( cpu == crashing_cpu )
   31.43 +        return 1;
   31.44 +    local_irq_disable();
   31.45 +
   31.46 +    machine_crash_save_cpu();
   31.47 +    disable_local_APIC();
   31.48 +    atomic_dec(&waiting_for_crash_ipi);
   31.49 +    hvm_disable();
   31.50 +
   31.51 +    for ( ; ; )
   31.52 +        __asm__ __volatile__ ( "hlt" );
   31.53 +
   31.54 +    return 1;
   31.55 +}
   31.56 +
   31.57 +/*
   31.58 + * By using the NMI code instead of a vector we just sneak thru the
   31.59 + * word generator coming out with just what we want.  AND it does
   31.60 + * not matter if clustered_apic_mode is set or not.
   31.61 + */
   31.62 +static void smp_send_nmi_allbutself(void)
   31.63 +{
   31.64 +    cpumask_t allbutself = cpu_online_map;
   31.65 +
   31.66 +    cpu_clear(smp_processor_id(), allbutself);
   31.67 +    send_IPI_mask(allbutself, APIC_DM_NMI);
   31.68 +}
   31.69 +
   31.70 +static void nmi_shootdown_cpus(void)
   31.71 +{
   31.72 +    unsigned long msecs;
   31.73 +
   31.74 +    atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
   31.75 +    /* Would it be better to replace the trap vector here? */
   31.76 +    set_nmi_callback(crash_nmi_callback);
   31.77 +    /* Ensure the new callback function is set before sending
   31.78 +     * out the NMI
   31.79 +     */
   31.80 +    wmb();
   31.81 +
   31.82 +    smp_send_nmi_allbutself();
   31.83 +
   31.84 +    msecs = 1000; /* Wait at most a second for the other cpus to stop */
   31.85 +    while ( (atomic_read(&waiting_for_crash_ipi) > 0) && msecs )
   31.86 +    {
   31.87 +        mdelay(1);
   31.88 +        msecs--;
   31.89 +    }
   31.90 +
   31.91 +    /* Leave the nmi callback set */
   31.92 +    disable_local_APIC();
   31.93 +}
   31.94 +#endif
   31.95 +
   31.96 +static void crash_save_xen_notes(void)
   31.97 +{
   31.98 +    crash_xen_info_t *info;
   31.99 +
  31.100 +    info = machine_crash_save_info();
  31.101 +
  31.102 +    info->dom0_pfn_to_mfn_frame_list_list = \
  31.103 +        dom0->shared_info->arch.pfn_to_mfn_frame_list_list;
  31.104 +}
  31.105 +
  31.106 +void machine_crash_shutdown(void)
  31.107 +{
  31.108 +    printk("machine_crash_shutdown: %d\n", smp_processor_id());
  31.109 +    local_irq_disable();
  31.110 +
  31.111 +#ifdef CONFIG_SMP
  31.112 +    nmi_shootdown_cpus();
  31.113 +#endif
  31.114 +
  31.115 +#ifdef CONFIG_X86_IO_APIC
  31.116 +    disable_IO_APIC();
  31.117 +#endif
  31.118 +    hvm_disable();
  31.119 +
  31.120 +    crash_save_xen_notes();
  31.121 +}
  31.122 +
  31.123 +/*
  31.124 + * Local variables:
  31.125 + * mode: C
  31.126 + * c-set-style: "BSD"
  31.127 + * c-basic-offset: 4
  31.128 + * tab-width: 4
  31.129 + * indent-tabs-mode: nil
  31.130 + * End:
  31.131 + */
    32.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    32.2 +++ b/xen/arch/x86/machine_kexec.c	Thu Nov 30 13:05:27 2006 +0000
    32.3 @@ -0,0 +1,105 @@
    32.4 +/******************************************************************************
    32.5 + * machine_kexec.c
    32.6 + *
    32.7 + * Xen port written by:
    32.8 + * - Simon 'Horms' Horman <horms@verge.net.au>
    32.9 + * - Magnus Damm <magnus@valinux.co.jp>
   32.10 + */
   32.11 +
   32.12 +#include <xen/lib.h>
   32.13 +#include <asm/irq.h>
   32.14 +#include <asm/page.h>
   32.15 +#include <asm/flushtlb.h>
   32.16 +#include <xen/smp.h>
   32.17 +#include <xen/nmi.h>
   32.18 +#include <xen/types.h>
   32.19 +#include <xen/console.h>
   32.20 +#include <xen/kexec.h>
   32.21 +#include <asm/kexec.h>
   32.22 +#include <xen/domain_page.h>
   32.23 +#include <asm/fixmap.h>
   32.24 +#include <asm/hvm/hvm.h>
   32.25 +
   32.26 +int machine_kexec_load(int type, int slot, xen_kexec_image_t *image)
   32.27 +{
   32.28 +    unsigned long prev_ma = 0;
   32.29 +    int fix_base = FIX_KEXEC_BASE_0 + (slot * (KEXEC_XEN_NO_PAGES >> 1));
   32.30 +    int k;
   32.31 +
   32.32 +    /* setup fixmap to point to our pages and record the virtual address
   32.33 +     * in every odd index in page_list[].
   32.34 +     */
   32.35 +
   32.36 +    for ( k = 0; k < KEXEC_XEN_NO_PAGES; k++ )
   32.37 +    {
   32.38 +        if ( (k & 1) == 0 )
   32.39 +        {
   32.40 +            /* Even pages: machine address. */
   32.41 +            prev_ma = image->page_list[k];
   32.42 +        }
   32.43 +        else
   32.44 +        {
   32.45 +            /* Odd pages: va for previous ma. */
   32.46 +            set_fixmap(fix_base + (k >> 1), prev_ma);
   32.47 +            image->page_list[k] = fix_to_virt(fix_base + (k >> 1));
   32.48 +        }
   32.49 +    }
   32.50 +
   32.51 +    return 0;
   32.52 +}
   32.53 +
   32.54 +void machine_kexec_unload(int type, int slot, xen_kexec_image_t *image)
   32.55 +{
   32.56 +}
   32.57 +
   32.58 +static void __machine_shutdown(void *data)
   32.59 +{
   32.60 +    xen_kexec_image_t *image = (xen_kexec_image_t *)data;
   32.61 +
   32.62 +    watchdog_disable();
   32.63 +    console_start_sync();
   32.64 +
   32.65 +    smp_send_stop();
   32.66 +
   32.67 +#ifdef CONFIG_X86_IO_APIC
   32.68 +    disable_IO_APIC();
   32.69 +#endif
   32.70 +    hvm_disable();
   32.71 +
   32.72 +    machine_kexec(image);
   32.73 +}
   32.74 +
   32.75 +void machine_shutdown(xen_kexec_image_t *image)
   32.76 +{
   32.77 +    int reboot_cpu_id;
   32.78 +    cpumask_t reboot_cpu;
   32.79 +
   32.80 +    reboot_cpu_id = 0;
   32.81 +
   32.82 +    if ( !cpu_isset(reboot_cpu_id, cpu_online_map) )
   32.83 +        reboot_cpu_id = smp_processor_id();
   32.84 +
   32.85 +    if ( reboot_cpu_id != smp_processor_id() )
   32.86 +    {
   32.87 +        cpus_clear(reboot_cpu);
   32.88 +        cpu_set(reboot_cpu_id, reboot_cpu);
   32.89 +        on_selected_cpus(reboot_cpu, __machine_shutdown, image, 1, 0);
   32.90 +        for (;;)
   32.91 +                ; /* nothing */
   32.92 +    }
   32.93 +    else
   32.94 +    {
   32.95 +        __machine_shutdown(image);
   32.96 +    }
   32.97 +    BUG();
   32.98 +}
   32.99 +
  32.100 +/*
  32.101 + * Local variables:
  32.102 + * mode: C
  32.103 + * c-set-style: "BSD"
  32.104 + * c-basic-offset: 4
  32.105 + * tab-width: 4
  32.106 + * indent-tabs-mode: nil
  32.107 + * End:
  32.108 + */
    33.1 --- a/xen/arch/x86/setup.c	Thu Nov 30 10:57:28 2006 +0000
    33.2 +++ b/xen/arch/x86/setup.c	Thu Nov 30 13:05:27 2006 +0000
    33.3 @@ -27,6 +27,7 @@
    33.4  #include <asm/shadow.h>
    33.5  #include <asm/e820.h>
    33.6  #include <acm/acm_hooks.h>
    33.7 +#include <xen/kexec.h>
    33.8  
    33.9  extern void dmi_scan_machine(void);
   33.10  extern void generic_apic_probe(void);
   33.11 @@ -273,6 +274,20 @@ static void srat_detect_node(int cpu)
   33.12          printk(KERN_INFO "CPU %d APIC %d -> Node %d\n", cpu, apicid, node);
   33.13  }
   33.14  
   33.15 +void __init move_memory(unsigned long dst,
   33.16 +                          unsigned long src_start, unsigned long src_end)
   33.17 +{
   33.18 +#if defined(CONFIG_X86_32)
   33.19 +    memmove((void *)dst,            /* use low mapping */
   33.20 +            (void *)src_start,      /* use low mapping */
   33.21 +            src_end - src_start);
   33.22 +#elif defined(CONFIG_X86_64)
   33.23 +    memmove(__va(dst),
   33.24 +            __va(src_start),
   33.25 +            src_end - src_start);
   33.26 +#endif
   33.27 +}
   33.28 +
   33.29  void __init __start_xen(multiboot_info_t *mbi)
   33.30  {
   33.31      char __cmdline[] = "", *cmdline = __cmdline;
   33.32 @@ -284,6 +299,7 @@ void __init __start_xen(multiboot_info_t
   33.33      unsigned long nr_pages, modules_length;
   33.34      paddr_t s, e;
   33.35      int i, e820_warn = 0, e820_raw_nr = 0, bytes = 0;
   33.36 +    xen_kexec_reserve_t crash_area;
   33.37      struct ns16550_defaults ns16550 = {
   33.38          .data_bits = 8,
   33.39          .parity    = 'n',
   33.40 @@ -415,15 +431,8 @@ void __init __start_xen(multiboot_info_t
   33.41          initial_images_start = xenheap_phys_end;
   33.42      initial_images_end = initial_images_start + modules_length;
   33.43  
   33.44 -#if defined(CONFIG_X86_32)
   33.45 -    memmove((void *)initial_images_start,  /* use low mapping */
   33.46 -            (void *)mod[0].mod_start,      /* use low mapping */
   33.47 -            mod[mbi->mods_count-1].mod_end - mod[0].mod_start);
   33.48 -#elif defined(CONFIG_X86_64)
   33.49 -    memmove(__va(initial_images_start),
   33.50 -            __va(mod[0].mod_start),
   33.51 -            mod[mbi->mods_count-1].mod_end - mod[0].mod_start);
   33.52 -#endif
   33.53 +    move_memory(initial_images_start, 
   33.54 +                mod[0].mod_start, mod[mbi->mods_count-1].mod_end);
   33.55  
   33.56      /* Initialise boot-time allocator with all RAM situated after modules. */
   33.57      xenheap_phys_start = init_boot_allocator(__pa(&_end));
   33.58 @@ -471,10 +480,57 @@ void __init __start_xen(multiboot_info_t
   33.59  #endif
   33.60      }
   33.61  
   33.62 +    machine_kexec_reserved(&crash_area);
   33.63 +    if ( crash_area.size > 0 )
   33.64 +    {
   33.65 +        unsigned long kdump_start, kdump_size, k;
   33.66 +
   33.67 +        /* Mark images pages as free for now. */
   33.68 +
   33.69 +        init_boot_pages(initial_images_start, initial_images_end);
   33.70 +
   33.71 +        kdump_start = crash_area.start;
   33.72 +        kdump_size = crash_area.size;
   33.73 +
   33.74 +        printk("Kdump: %luMB (%lukB) at 0x%lx\n",
   33.75 +               kdump_size >> 20,
   33.76 +               kdump_size >> 10,
   33.77 +               kdump_start);
   33.78 +
   33.79 +        if ( (kdump_start & ~PAGE_MASK) || (kdump_size & ~PAGE_MASK) )
   33.80 +            panic("Kdump parameters not page aligned\n");
   33.81 +
   33.82 +        kdump_start >>= PAGE_SHIFT;
   33.83 +        kdump_size >>= PAGE_SHIFT;
   33.84 +
   33.85 +        /* allocate pages for Kdump memory area */
   33.86 +
   33.87 +        k = alloc_boot_pages_at(kdump_size, kdump_start);
   33.88 +
   33.89 +        if ( k != kdump_start )
   33.90 +            panic("Unable to reserve Kdump memory\n");
   33.91 +
   33.92 +        /* allocate pages for relocated initial images */
   33.93 +
   33.94 +        k = ((initial_images_end - initial_images_start) & ~PAGE_MASK) ? 1 : 0;
   33.95 +        k += (initial_images_end - initial_images_start) >> PAGE_SHIFT;
   33.96 +
   33.97 +        k = alloc_boot_pages(k, 1);
   33.98 +
   33.99 +        if ( !k )
  33.100 +            panic("Unable to allocate initial images memory\n");
  33.101 +
  33.102 +        move_memory(k << PAGE_SHIFT, initial_images_start, initial_images_end);
  33.103 +
  33.104 +        initial_images_end -= initial_images_start;
  33.105 +        initial_images_start = k << PAGE_SHIFT;
  33.106 +        initial_images_end += initial_images_start;
  33.107 +    }
  33.108 +
  33.109      memguard_init();
  33.110      percpu_guard_areas();
  33.111  
  33.112 -    printk("System RAM: %luMB (%lukB)\n", 
  33.113 +    printk("System RAM: %luMB (%lukB)\n",
  33.114             nr_pages >> (20 - PAGE_SHIFT),
  33.115             nr_pages << (PAGE_SHIFT - 10));
  33.116      total_pages = nr_pages;
    34.1 --- a/xen/arch/x86/traps.c	Thu Nov 30 10:57:28 2006 +0000
    34.2 +++ b/xen/arch/x86/traps.c	Thu Nov 30 13:05:27 2006 +0000
    34.3 @@ -45,6 +45,7 @@
    34.4  #include <xen/iocap.h>
    34.5  #include <xen/nmi.h>
    34.6  #include <xen/version.h>
    34.7 +#include <xen/kexec.h>
    34.8  #include <asm/shadow.h>
    34.9  #include <asm/system.h>
   34.10  #include <asm/io.h>
   34.11 @@ -1633,6 +1634,7 @@ static void unknown_nmi_error(unsigned c
   34.12          printk("Uhhuh. NMI received for unknown reason %02x.\n", reason);
   34.13          printk("Dazed and confused, but trying to continue\n");
   34.14          printk("Do you have a strange power saving mode enabled?\n");
   34.15 +        machine_crash_kexec();
   34.16      }
   34.17  }
   34.18  
    35.1 --- a/xen/arch/x86/x86_32/entry.S	Thu Nov 30 10:57:28 2006 +0000
    35.2 +++ b/xen/arch/x86/x86_32/entry.S	Thu Nov 30 13:05:27 2006 +0000
    35.3 @@ -659,6 +659,7 @@ ENTRY(hypercall_table)
    35.4          .long do_hvm_op
    35.5          .long do_sysctl             /* 35 */
    35.6          .long do_domctl
    35.7 +        .long do_kexec_op
    35.8          .rept NR_hypercalls-((.-hypercall_table)/4)
    35.9          .long do_ni_hypercall
   35.10          .endr
   35.11 @@ -701,6 +702,7 @@ ENTRY(hypercall_args_table)
   35.12          .byte 2 /* do_hvm_op            */
   35.13          .byte 1 /* do_sysctl            */  /* 35 */
   35.14          .byte 1 /* do_domctl            */
   35.15 +        .byte 2 /* do_kexec_op          */
   35.16          .rept NR_hypercalls-(.-hypercall_args_table)
   35.17          .byte 0 /* do_ni_hypercall      */
   35.18          .endr
    36.1 --- a/xen/arch/x86/x86_64/entry.S	Thu Nov 30 10:57:28 2006 +0000
    36.2 +++ b/xen/arch/x86/x86_64/entry.S	Thu Nov 30 13:05:27 2006 +0000
    36.3 @@ -559,6 +559,7 @@ ENTRY(hypercall_table)
    36.4          .quad do_hvm_op
    36.5          .quad do_sysctl             /* 35 */
    36.6          .quad do_domctl
    36.7 +        .quad do_kexec_op
    36.8          .rept NR_hypercalls-((.-hypercall_table)/8)
    36.9          .quad do_ni_hypercall
   36.10          .endr
   36.11 @@ -601,6 +602,7 @@ ENTRY(hypercall_args_table)
   36.12          .byte 2 /* do_hvm_op            */
   36.13          .byte 1 /* do_sysctl            */  /* 35 */
   36.14          .byte 1 /* do_domctl            */
   36.15 +        .byte 2 /* do_kexec             */
   36.16          .rept NR_hypercalls-(.-hypercall_args_table)
   36.17          .byte 0 /* do_ni_hypercall      */
   36.18          .endr
    37.1 --- a/xen/common/Makefile	Thu Nov 30 10:57:28 2006 +0000
    37.2 +++ b/xen/common/Makefile	Thu Nov 30 13:05:27 2006 +0000
    37.3 @@ -7,6 +7,7 @@ obj-y += event_channel.o
    37.4  obj-y += grant_table.o
    37.5  obj-y += kernel.o
    37.6  obj-y += keyhandler.o
    37.7 +obj-y += kexec.o
    37.8  obj-y += lib.o
    37.9  obj-y += memory.o
   37.10  obj-y += multicall.o
    38.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    38.2 +++ b/xen/common/kexec.c	Thu Nov 30 13:05:27 2006 +0000
    38.3 @@ -0,0 +1,359 @@
    38.4 +/******************************************************************************
    38.5 + * kexec.c - Achitecture independent kexec code for Xen
    38.6 + *
    38.7 + * Xen port written by:
    38.8 + * - Simon 'Horms' Horman <horms@verge.net.au>
    38.9 + * - Magnus Damm <magnus@valinux.co.jp>
   38.10 + */
   38.11 +
   38.12 +#include <asm/kexec.h>
   38.13 +#include <xen/lib.h>
   38.14 +#include <xen/ctype.h>
   38.15 +#include <xen/errno.h>
   38.16 +#include <xen/guest_access.h>
   38.17 +#include <xen/sched.h>
   38.18 +#include <xen/types.h>
   38.19 +#include <xen/kexec.h>
   38.20 +#include <xen/keyhandler.h>
   38.21 +#include <public/kexec.h>
   38.22 +#include <xen/cpumask.h>
   38.23 +#include <asm/atomic.h>
   38.24 +#include <xen/spinlock.h>
   38.25 +#include <xen/version.h>
   38.26 +#include <public/elfnote.h>
   38.27 +
   38.28 +static char opt_crashkernel[32] = "";
   38.29 +string_param("crashkernel", opt_crashkernel);
   38.30 +
   38.31 +DEFINE_PER_CPU (crash_note_t, crash_notes);
   38.32 +cpumask_t crash_saved_cpus;
   38.33 +int crashing_cpu;
   38.34 +
   38.35 +xen_kexec_image_t kexec_image[KEXEC_IMAGE_NR];
   38.36 +
   38.37 +#define KEXEC_FLAG_DEFAULT_POS   (KEXEC_IMAGE_NR + 0)
   38.38 +#define KEXEC_FLAG_CRASH_POS     (KEXEC_IMAGE_NR + 1)
   38.39 +#define KEXEC_FLAG_IN_PROGRESS   (KEXEC_IMAGE_NR + 2)
   38.40 +
   38.41 +unsigned long kexec_flags = 0; /* the lowest bits are for KEXEC_IMAGE... */
   38.42 +
   38.43 +spinlock_t kexec_lock = SPIN_LOCK_UNLOCKED;
   38.44 +
   38.45 +static void one_cpu_only(void)
   38.46 +{
   38.47 +   /* Only allow the first cpu to continue - force other cpus to spin */
   38.48 +    if ( test_and_set_bit(KEXEC_FLAG_IN_PROGRESS, &kexec_flags) )
   38.49 +    {
   38.50 +        while (1);
   38.51 +    }
   38.52 +}
   38.53 +
   38.54 +/* Save the registers in the per-cpu crash note buffer */
   38.55 +
   38.56 +void machine_crash_save_cpu(void)
   38.57 +{
   38.58 +    int cpu = smp_processor_id();
   38.59 +    crash_note_t *cntp;
   38.60 +
   38.61 +    if ( !cpu_test_and_set(cpu, crash_saved_cpus) )
   38.62 +    {
   38.63 +        cntp = &per_cpu(crash_notes, cpu);
   38.64 +        elf_core_save_regs(&cntp->core.desc.desc.pr_reg,
   38.65 +                           &cntp->xen_regs.desc.desc);
   38.66 +
   38.67 +        /* setup crash "CORE" note */
   38.68 +        setup_crash_note(cntp, core, CORE_STR, CORE_STR_LEN, NT_PRSTATUS);
   38.69 +
   38.70 +        /* setup crash note "Xen", XEN_ELFNOTE_CRASH_REGS */
   38.71 +        setup_crash_note(cntp, xen_regs, XEN_STR, XEN_STR_LEN,
   38.72 +                         XEN_ELFNOTE_CRASH_REGS);
   38.73 +    }
   38.74 +}
   38.75 +
   38.76 +/* Setup the single Xen specific info crash note */
   38.77 +
   38.78 +crash_xen_info_t *machine_crash_save_info(void)
   38.79 +{
   38.80 +    int cpu = smp_processor_id();
   38.81 +    crash_note_t *cntp;
   38.82 +    crash_xen_info_t *info;
   38.83 +
   38.84 +    BUG_ON(!cpu_test_and_set(cpu, crash_saved_cpus));
   38.85 +
   38.86 +    cntp = &per_cpu(crash_notes, cpu);
   38.87 +
   38.88 +    /* setup crash note "Xen", XEN_ELFNOTE_CRASH_INFO */
   38.89 +    setup_crash_note(cntp, xen_info, XEN_STR, XEN_STR_LEN,
   38.90 +                     XEN_ELFNOTE_CRASH_INFO);
   38.91 +
   38.92 +    info = &cntp->xen_info.desc.desc;
   38.93 +
   38.94 +    info->xen_major_version = xen_major_version();
   38.95 +    info->xen_minor_version = xen_minor_version();
   38.96 +    info->xen_extra_version = __pa(xen_extra_version());
   38.97 +    info->xen_changeset = __pa(xen_changeset());
   38.98 +    info->xen_compiler = __pa(xen_compiler());
   38.99 +    info->xen_compile_date = __pa(xen_compile_date());
  38.100 +    info->xen_compile_time = __pa(xen_compile_time());
  38.101 +    info->tainted = tainted;
  38.102 +
  38.103 +    return info;
  38.104 +}
  38.105 +
  38.106 +void machine_crash_kexec(void)
  38.107 +{
  38.108 +    int pos;
  38.109 +    xen_kexec_image_t *image;
  38.110 +
  38.111 +    one_cpu_only();
  38.112 +
  38.113 +    machine_crash_save_cpu();
  38.114 +    crashing_cpu = smp_processor_id();
  38.115 +
  38.116 +    machine_crash_shutdown();
  38.117 +
  38.118 +    pos = (test_bit(KEXEC_FLAG_CRASH_POS, &kexec_flags) != 0);
  38.119 +
  38.120 +    if ( test_bit(KEXEC_IMAGE_CRASH_BASE + pos, &kexec_flags) )
  38.121 +    {
  38.122 +        image = &kexec_image[KEXEC_IMAGE_CRASH_BASE + pos];
  38.123 +        machine_kexec(image); /* Does not return */
  38.124 +    }
  38.125 +
  38.126 +    while (1); /* No image available - just spin */
  38.127 +}
  38.128 +
  38.129 +static void do_crashdump_trigger(unsigned char key)
  38.130 +{
  38.131 +	printk("triggering crashdump\n");
  38.132 +	machine_crash_kexec();
  38.133 +}
  38.134 +
  38.135 +static __init int register_crashdump_trigger(void)
  38.136 +{
  38.137 +	register_keyhandler('c', do_crashdump_trigger, "trigger a crashdump");
  38.138 +	return 0;
  38.139 +}
  38.140 +__initcall(register_crashdump_trigger);
  38.141 +
  38.142 +void machine_kexec_reserved(xen_kexec_reserve_t *reservation)
  38.143 +{
  38.144 +    unsigned long start, size;
  38.145 +    char *str = opt_crashkernel;
  38.146 +
  38.147 +    memset(reservation, 0, sizeof(*reservation));
  38.148 +
  38.149 +    size = parse_size_and_unit(str, &str);
  38.150 +    if ( *str == '@' )
  38.151 +        start = parse_size_and_unit(str+1, NULL);
  38.152 +    else
  38.153 +        start = 0;
  38.154 +
  38.155 +    if ( start && size )
  38.156 +    {
  38.157 +        reservation->start = start;
  38.158 +        reservation->size = size;
  38.159 +    }
  38.160 +}
  38.161 +
  38.162 +static int kexec_get_reserve(xen_kexec_range_t *range)
  38.163 +{
  38.164 +    xen_kexec_reserve_t reservation;
  38.165 +
  38.166 +    machine_kexec_reserved(&reservation);
  38.167 +
  38.168 +    range->start = reservation.start;
  38.169 +    range->size = reservation.size;
  38.170 +    return 0;
  38.171 +}
  38.172 +
  38.173 +extern unsigned long _text, _end;
  38.174 +
  38.175 +static int kexec_get_xen(xen_kexec_range_t *range, int get_ma)
  38.176 +{
  38.177 +    if ( get_ma )
  38.178 +        range->start = virt_to_maddr(&_text);
  38.179 +    else
  38.180 +        range->start = (unsigned long) &_text;
  38.181 +
  38.182 +    range->size = &_end - &_text;
  38.183 +    return 0;
  38.184 +}
  38.185 +
  38.186 +static int kexec_get_cpu(xen_kexec_range_t *range)
  38.187 +{
  38.188 +    if ( range->nr < 0 || range->nr >= num_present_cpus() )
  38.189 +        return -EINVAL;
  38.190 +
  38.191 +    range->start = __pa((unsigned long)&per_cpu(crash_notes, range->nr));
  38.192 +    range->size = sizeof(crash_note_t);
  38.193 +    return 0;
  38.194 +}
  38.195 +
  38.196 +static int kexec_get_range(XEN_GUEST_HANDLE(void) uarg)
  38.197 +{
  38.198 +    xen_kexec_range_t range;
  38.199 +    int ret = -EINVAL;
  38.200 +
  38.201 +    if ( unlikely(copy_from_guest(&range, uarg, 1)) )
  38.202 +        return -EFAULT;
  38.203 +
  38.204 +    switch ( range.range )
  38.205 +    {
  38.206 +    case KEXEC_RANGE_MA_CRASH:
  38.207 +        ret = kexec_get_reserve(&range);
  38.208 +        break;
  38.209 +    case KEXEC_RANGE_MA_XEN:
  38.210 +        ret = kexec_get_xen(&range, 1);
  38.211 +        break;
  38.212 +    case KEXEC_RANGE_VA_XEN:
  38.213 +        ret = kexec_get_xen(&range, 0);
  38.214 +        break;
  38.215 +    case KEXEC_RANGE_MA_CPU:
  38.216 +        ret = kexec_get_cpu(&range);
  38.217 +        break;
  38.218 +    }
  38.219 +
  38.220 +    if ( ret == 0 && unlikely(copy_to_guest(uarg, &range, 1)) )
  38.221 +        return -EFAULT;
  38.222 +
  38.223 +    return ret;
  38.224 +}
  38.225 +
  38.226 +static int kexec_load_get_bits(int type, int *base, int *bit)
  38.227 +{
  38.228 +    switch ( type )
  38.229 +    {
  38.230 +    case KEXEC_TYPE_DEFAULT:
  38.231 +        *base = KEXEC_IMAGE_DEFAULT_BASE;
  38.232 +        *bit = KEXEC_FLAG_DEFAULT_POS;
  38.233 +        break;
  38.234 +    case KEXEC_TYPE_CRASH:
  38.235 +        *base = KEXEC_IMAGE_CRASH_BASE;
  38.236 +        *bit = KEXEC_FLAG_CRASH_POS;
  38.237 +        break;
  38.238 +    default:
  38.239 +        return -1;
  38.240 +    }
  38.241 +    return 0;
  38.242 +}
  38.243 +
  38.244 +static int kexec_load_unload(unsigned long op, XEN_GUEST_HANDLE(void) uarg)
  38.245 +{
  38.246 +    xen_kexec_load_t load;
  38.247 +    xen_kexec_image_t *image;
  38.248 +    int base, bit, pos;
  38.249 +    int ret = 0;
  38.250 +
  38.251 +    if ( unlikely(copy_from_guest(&load, uarg, 1)) )
  38.252 +        return -EFAULT;
  38.253 +
  38.254 +    if ( kexec_load_get_bits(load.type, &base, &bit) )
  38.255 +        return -EINVAL;
  38.256 +
  38.257 +    pos = (test_bit(bit, &kexec_flags) != 0);
  38.258 +
  38.259 +    /* Load the user data into an unused image */
  38.260 +    if ( op == KEXEC_CMD_kexec_load )
  38.261 +    {
  38.262 +        image = &kexec_image[base + !pos];
  38.263 +
  38.264 +        BUG_ON(test_bit((base + !pos), &kexec_flags)); /* must be free */
  38.265 +
  38.266 +        memcpy(image, &load.image, sizeof(*image));
  38.267 +
  38.268 +        if ( !(ret = machine_kexec_load(load.type, base + !pos, image)) )
  38.269 +        {
  38.270 +            /* Set image present bit */
  38.271 +            set_bit((base + !pos), &kexec_flags);
  38.272 +
  38.273 +            /* Make new image the active one */
  38.274 +            change_bit(bit, &kexec_flags);
  38.275 +        }
  38.276 +    }
  38.277 +
  38.278 +    /* Unload the old image if present and load successful */
  38.279 +    if ( ret == 0 && !test_bit(KEXEC_FLAG_IN_PROGRESS, &kexec_flags) )
  38.280 +    {
  38.281 +        if ( test_and_clear_bit((base + pos), &kexec_flags) )
  38.282 +        {
  38.283 +            image = &kexec_image[base + pos];
  38.284 +            machine_kexec_unload(load.type, base + pos, image);
  38.285 +        }
  38.286 +    }
  38.287 +
  38.288 +    return ret;
  38.289 +}
  38.290 +
  38.291 +static int kexec_exec(XEN_GUEST_HANDLE(void) uarg)
  38.292 +{
  38.293 +    xen_kexec_exec_t exec;
  38.294 +    xen_kexec_image_t *image;
  38.295 +    int base, bit, pos;
  38.296 +
  38.297 +    if ( unlikely(copy_from_guest(&exec, uarg, 1)) )
  38.298 +        return -EFAULT;
  38.299 +
  38.300 +    if ( kexec_load_get_bits(exec.type, &base, &bit) )
  38.301 +        return -EINVAL;
  38.302 +
  38.303 +    pos = (test_bit(bit, &kexec_flags) != 0);
  38.304 +
  38.305 +    /* Only allow kexec/kdump into loaded images */
  38.306 +    if ( !test_bit(base + pos, &kexec_flags) )
  38.307 +        return -ENOENT;
  38.308 +
  38.309 +    switch (exec.type)
  38.310 +    {
  38.311 +    case KEXEC_TYPE_DEFAULT:
  38.312 +        image = &kexec_image[base + pos];
  38.313 +        one_cpu_only();
  38.314 +        machine_shutdown(image); /* Does not return */
  38.315 +        break;
  38.316 +    case KEXEC_TYPE_CRASH:
  38.317 +        machine_crash_kexec(); /* Does not return */
  38.318 +        break;
  38.319 +    }
  38.320 +
  38.321 +    return -EINVAL; /* never reached */
  38.322 +}
  38.323 +
  38.324 +long do_kexec_op(unsigned long op, XEN_GUEST_HANDLE(void) uarg)
  38.325 +{
  38.326 +    unsigned long flags;
  38.327 +    int ret = -EINVAL;
  38.328 +
  38.329 +    if ( !IS_PRIV(current->domain) )
  38.330 +        return -EPERM;
  38.331 +
  38.332 +    switch ( op )
  38.333 +    {
  38.334 +    case KEXEC_CMD_kexec_get_range:
  38.335 +        ret = kexec_get_range(uarg);
  38.336 +        break;
  38.337 +    case KEXEC_CMD_kexec_load:
  38.338 +    case KEXEC_CMD_kexec_unload:
  38.339 +        spin_lock_irqsave(&kexec_lock, flags);
  38.340 +        if (!test_bit(KEXEC_FLAG_IN_PROGRESS, &kexec_flags))
  38.341 +        {
  38.342 +            ret = kexec_load_unload(op, uarg);
  38.343 +        }
  38.344 +        spin_unlock_irqrestore(&kexec_lock, flags);
  38.345 +        break;
  38.346 +    case KEXEC_CMD_kexec:
  38.347 +        ret = kexec_exec(uarg);
  38.348 +        break;
  38.349 +    }
  38.350 +
  38.351 +    return ret;
  38.352 +}
  38.353 +
  38.354 +/*
  38.355 + * Local variables:
  38.356 + * mode: C
  38.357 + * c-set-style: "BSD"
  38.358 + * c-basic-offset: 4
  38.359 + * tab-width: 4
  38.360 + * indent-tabs-mode: nil
  38.361 + * End:
  38.362 + */
    39.1 --- a/xen/common/page_alloc.c	Thu Nov 30 10:57:28 2006 +0000
    39.2 +++ b/xen/common/page_alloc.c	Thu Nov 30 13:05:27 2006 +0000
    39.3 @@ -237,24 +237,35 @@ void init_boot_pages(paddr_t ps, paddr_t
    39.4      }
    39.5  }
    39.6  
    39.7 +unsigned long alloc_boot_pages_at(unsigned long nr_pfns, unsigned long pfn_at)
    39.8 +{
    39.9 +    unsigned long i;
   39.10 +
   39.11 +    for ( i = 0; i < nr_pfns; i++ )
   39.12 +        if ( allocated_in_map(pfn_at + i) )
   39.13 +             break;
   39.14 +
   39.15 +    if ( i == nr_pfns )
   39.16 +    {
   39.17 +        map_alloc(pfn_at, nr_pfns);
   39.18 +        return pfn_at;
   39.19 +    }
   39.20 +
   39.21 +    return 0;
   39.22 +}
   39.23 +
   39.24  unsigned long alloc_boot_pages(unsigned long nr_pfns, unsigned long pfn_align)
   39.25  {
   39.26 -    unsigned long pg, i;
   39.27 +    unsigned long pg, i = 0;
   39.28  
   39.29      for ( pg = 0; (pg + nr_pfns) < max_page; pg += pfn_align )
   39.30      {
   39.31 -        for ( i = 0; i < nr_pfns; i++ )
   39.32 -            if ( allocated_in_map(pg + i) )
   39.33 -                 break;
   39.34 -
   39.35 -        if ( i == nr_pfns )
   39.36 -        {
   39.37 -            map_alloc(pg, nr_pfns);
   39.38 -            return pg;
   39.39 -        }
   39.40 +        i = alloc_boot_pages_at(nr_pfns, pg);
   39.41 +        if (i != 0)
   39.42 +            break;
   39.43      }
   39.44  
   39.45 -    return 0;
   39.46 +    return i;
   39.47  }
   39.48  
   39.49  
    40.1 --- a/xen/drivers/char/console.c	Thu Nov 30 10:57:28 2006 +0000
    40.2 +++ b/xen/drivers/char/console.c	Thu Nov 30 13:05:27 2006 +0000
    40.3 @@ -27,6 +27,7 @@
    40.4  #include <xen/guest_access.h>
    40.5  #include <xen/shutdown.h>
    40.6  #include <xen/vga.h>
    40.7 +#include <xen/kexec.h>
    40.8  #include <asm/current.h>
    40.9  #include <asm/debugger.h>
   40.10  #include <asm/io.h>
   40.11 @@ -865,6 +866,8 @@ void panic(const char *fmt, ...)
   40.12  
   40.13      debugger_trap_immediate();
   40.14  
   40.15 +    machine_crash_kexec();
   40.16 +
   40.17      if ( opt_noreboot )
   40.18      {
   40.19          machine_halt();
    41.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    41.2 +++ b/xen/include/asm-ia64/elf.h	Thu Nov 30 13:05:27 2006 +0000
    41.3 @@ -0,0 +1,30 @@
    41.4 +#ifndef __IA64_ELF_H__
    41.5 +#define __IA64_ELF_H__
    41.6 +
    41.7 +#include <xen/lib.h>       /* for printk() used in stub */
    41.8 +
    41.9 +typedef struct {
   41.10 +    unsigned long dummy;
   41.11 +} ELF_Gregset;
   41.12 +
   41.13 +typedef struct {
   41.14 +    unsigned long dummy;
   41.15 +} crash_xen_core_t;
   41.16 +
   41.17 +extern inline void elf_core_save_regs(ELF_Gregset *core_regs, 
   41.18 +                                      crash_xen_core_t *xen_core_regs)
   41.19 +{
   41.20 +    printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__);
   41.21 +}
   41.22 +
   41.23 +#endif /* __IA64_ELF_H__ */
   41.24 +
   41.25 +/*
   41.26 + * Local variables:
   41.27 + * mode: C
   41.28 + * c-set-style: "BSD"
   41.29 + * c-basic-offset: 4
   41.30 + * tab-width: 4
   41.31 + * indent-tabs-mode: nil
   41.32 + * End:
   41.33 + */
    42.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    42.2 +++ b/xen/include/asm-ia64/kexec.h	Thu Nov 30 13:05:27 2006 +0000
    42.3 @@ -0,0 +1,25 @@
    42.4 +#ifndef __IA64_KEXEC_H__
    42.5 +#define __IA64_KEXEC_H__
    42.6 +
    42.7 +#include <xen/lib.h>       /* for printk() used in stub */
    42.8 +#include <xen/types.h>
    42.9 +#include <public/xen.h>
   42.10 +#include <xen/kexec.h>
   42.11 +
   42.12 +static inline void machine_kexec(xen_kexec_image_t *image)
   42.13 +{
   42.14 +    printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__);
   42.15 +}
   42.16 +
   42.17 +#endif /* __IA64_KEXEC_H__ */
   42.18 +
   42.19 +/*
   42.20 + * Local variables:
   42.21 + * mode: C
   42.22 + * c-set-style: "BSD"
   42.23 + * c-basic-offset: 4
   42.24 + * tab-width: 4
   42.25 + * indent-tabs-mode: nil
   42.26 + * End:
   42.27 + */
   42.28 +
    43.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    43.2 +++ b/xen/include/asm-powerpc/elf.h	Thu Nov 30 13:05:27 2006 +0000
    43.3 @@ -0,0 +1,30 @@
    43.4 +#ifndef _ASM_ELF_H__
    43.5 +#define _ASM_ELF_H__
    43.6 +
    43.7 +#include <xen/lib.h>       /* for printk() used in stub */
    43.8 +
    43.9 +typedef struct {
   43.10 +    unsigned long dummy;
   43.11 +} ELF_Gregset;
   43.12 +
   43.13 +typedef struct {
   43.14 +    unsigned long dummy;
   43.15 +} crash_xen_core_t;
   43.16 +
   43.17 +extern inline void elf_core_save_regs(ELF_Gregset *core_regs, 
   43.18 +                                      crash_xen_core_t *xen_core_regs)
   43.19 +{
   43.20 +    printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__);
   43.21 +}
   43.22 +
   43.23 +#endif /* _ASM_ELF_H__ */
   43.24 +
   43.25 +/*
   43.26 + * Local variables:
   43.27 + * mode: C
   43.28 + * c-set-style: "BSD"
   43.29 + * c-basic-offset: 4
   43.30 + * tab-width: 4
   43.31 + * indent-tabs-mode: nil
   43.32 + * End:
   43.33 + */
    44.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    44.2 +++ b/xen/include/asm-powerpc/kexec.h	Thu Nov 30 13:05:27 2006 +0000
    44.3 @@ -0,0 +1,25 @@
    44.4 +#ifndef _ASM_KEXEC_H__
    44.5 +#define _ASM_KEXEC_H__
    44.6 +
    44.7 +#include <xen/lib.h>       /* for printk() used in stub */
    44.8 +#include <xen/types.h>
    44.9 +#include <public/xen.h>
   44.10 +#include <xen/kexec.h>
   44.11 +
   44.12 +static inline void machine_kexec(xen_kexec_image_t *image)
   44.13 +{
   44.14 +    printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__);
   44.15 +}
   44.16 +
   44.17 +#endif /* _ASM_KEXEC_H__ */
   44.18 +
   44.19 +/*
   44.20 + * Local variables:
   44.21 + * mode: C
   44.22 + * c-set-style: "BSD"
   44.23 + * c-basic-offset: 4
   44.24 + * tab-width: 4
   44.25 + * indent-tabs-mode: nil
   44.26 + * End:
   44.27 + */
   44.28 +
    45.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    45.2 +++ b/xen/include/asm-x86/elf.h	Thu Nov 30 13:05:27 2006 +0000
    45.3 @@ -0,0 +1,24 @@
    45.4 +#ifndef __X86_ELF_H__
    45.5 +#define __X86_ELF_H__
    45.6 +
    45.7 +typedef struct {
    45.8 +    unsigned long cr0, cr2, cr3, cr4;
    45.9 +} crash_xen_core_t;
   45.10 +
   45.11 +#ifdef __x86_64__
   45.12 +#include <asm/x86_64/elf.h>
   45.13 +#else
   45.14 +#include <asm/x86_32/elf.h>
   45.15 +#endif
   45.16 +
   45.17 +#endif /* __X86_ELF_H__ */
   45.18 +
   45.19 +/*
   45.20 + * Local variables:
   45.21 + * mode: C
   45.22 + * c-set-style: "BSD"
   45.23 + * c-basic-offset: 4
   45.24 + * tab-width: 4
   45.25 + * indent-tabs-mode: nil
   45.26 + * End:
   45.27 + */
    46.1 --- a/xen/include/asm-x86/fixmap.h	Thu Nov 30 10:57:28 2006 +0000
    46.2 +++ b/xen/include/asm-x86/fixmap.h	Thu Nov 30 13:05:27 2006 +0000
    46.3 @@ -16,6 +16,7 @@
    46.4  #include <asm/apicdef.h>
    46.5  #include <asm/acpi.h>
    46.6  #include <asm/page.h>
    46.7 +#include <xen/kexec.h>
    46.8  
    46.9  /*
   46.10   * Here we define all the compile-time 'special' virtual
   46.11 @@ -36,6 +37,9 @@ enum fixed_addresses {
   46.12      FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1,
   46.13      FIX_HPET_BASE,
   46.14      FIX_CYCLONE_TIMER,
   46.15 +    FIX_KEXEC_BASE_0,
   46.16 +    FIX_KEXEC_BASE_END = FIX_KEXEC_BASE_0 \
   46.17 +      + ((KEXEC_XEN_NO_PAGES >> 1) * KEXEC_IMAGE_NR) - 1,
   46.18      __end_of_fixed_addresses
   46.19  };
   46.20  
    47.1 --- a/xen/include/asm-x86/hypercall.h	Thu Nov 30 10:57:28 2006 +0000
    47.2 +++ b/xen/include/asm-x86/hypercall.h	Thu Nov 30 13:05:27 2006 +0000
    47.3 @@ -6,6 +6,7 @@
    47.4  #define __ASM_X86_HYPERCALL_H__
    47.5  
    47.6  #include <public/physdev.h>
    47.7 +#include <xen/types.h>
    47.8  
    47.9  extern long
   47.10  do_event_channel_op_compat(
   47.11 @@ -87,6 +88,10 @@ extern long
   47.12  arch_do_vcpu_op(
   47.13      int cmd, struct vcpu *v, XEN_GUEST_HANDLE(void) arg);
   47.14  
   47.15 +extern int
   47.16 +do_kexec(
   47.17 +    unsigned long op, unsigned arg1, XEN_GUEST_HANDLE(void) uarg);
   47.18 +
   47.19  #ifdef __x86_64__
   47.20  
   47.21  extern long
    48.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    48.2 +++ b/xen/include/asm-x86/kexec.h	Thu Nov 30 13:05:27 2006 +0000
    48.3 @@ -0,0 +1,20 @@
    48.4 +#ifndef __X86_KEXEC_H__
    48.5 +#define __X86_KEXEC_H__
    48.6 +
    48.7 +#ifdef __x86_64__
    48.8 +#include <asm/x86_64/kexec.h>
    48.9 +#else
   48.10 +#include <asm/x86_32/kexec.h>
   48.11 +#endif
   48.12 +
   48.13 +#endif /* __X86_KEXEC_H__ */
   48.14 +
   48.15 +/*
   48.16 + * Local variables:
   48.17 + * mode: C
   48.18 + * c-set-style: "BSD"
   48.19 + * c-basic-offset: 4
   48.20 + * tab-width: 4
   48.21 + * indent-tabs-mode: nil
   48.22 + * End:
   48.23 + */
    49.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    49.2 +++ b/xen/include/asm-x86/x86_32/elf.h	Thu Nov 30 13:05:27 2006 +0000
    49.3 @@ -0,0 +1,72 @@
    49.4 +#ifndef __X86_32_ELF_H__
    49.5 +#define __X86_32_ELF_H__
    49.6 +
    49.7 +#include <asm/processor.h>
    49.8 +
    49.9 +typedef struct {
   49.10 +    unsigned long ebx;
   49.11 +    unsigned long ecx;
   49.12 +    unsigned long edx;
   49.13 +    unsigned long esi;
   49.14 +    unsigned long edi;
   49.15 +    unsigned long ebp;
   49.16 +    unsigned long eax;
   49.17 +    unsigned long ds;
   49.18 +    unsigned long es;
   49.19 +    unsigned long fs;
   49.20 +    unsigned long gs;
   49.21 +    unsigned long orig_eax;
   49.22 +    unsigned long eip;
   49.23 +    unsigned long cs;
   49.24 +    unsigned long eflags;
   49.25 +    unsigned long esp;
   49.26 +    unsigned long ss;
   49.27 +} ELF_Gregset;
   49.28 +
   49.29 +extern inline void elf_core_save_regs(ELF_Gregset *core_regs, 
   49.30 +                                      crash_xen_core_t *xen_core_regs)
   49.31 +{
   49.32 +    unsigned long tmp;
   49.33 +
   49.34 +    asm volatile("movl %%ebx,%0" : "=m"(core_regs->ebx));
   49.35 +    asm volatile("movl %%ecx,%0" : "=m"(core_regs->ecx));
   49.36 +    asm volatile("movl %%edx,%0" : "=m"(core_regs->edx));
   49.37 +    asm volatile("movl %%esi,%0" : "=m"(core_regs->esi));
   49.38 +    asm volatile("movl %%edi,%0" : "=m"(core_regs->edi));
   49.39 +    asm volatile("movl %%ebp,%0" : "=m"(core_regs->ebp));
   49.40 +    asm volatile("movl %%eax,%0" : "=m"(core_regs->eax));
   49.41 +    asm volatile("movw %%ds, %%ax;" :"=a"(core_regs->ds));
   49.42 +    asm volatile("movw %%es, %%ax;" :"=a"(core_regs->es));
   49.43 +    asm volatile("movw %%fs, %%ax;" :"=a"(core_regs->fs));
   49.44 +    asm volatile("movw %%gs, %%ax;" :"=a"(core_regs->gs));
   49.45 +    /* orig_eax not filled in for now */
   49.46 +    core_regs->eip = (unsigned long)current_text_addr();
   49.47 +    asm volatile("movw %%cs, %%ax;" :"=a"(core_regs->cs));
   49.48 +    asm volatile("pushfl; popl %0" :"=m"(core_regs->eflags));
   49.49 +    asm volatile("movl %%esp,%0" : "=m"(core_regs->esp));
   49.50 +    asm volatile("movw %%ss, %%ax;" :"=a"(core_regs->ss));
   49.51 +
   49.52 +    asm volatile("mov %%cr0, %0" : "=r" (tmp) : );
   49.53 +    xen_core_regs->cr0 = tmp;
   49.54 +
   49.55 +    asm volatile("mov %%cr2, %0" : "=r" (tmp) : );
   49.56 +    xen_core_regs->cr2 = tmp;
   49.57 +
   49.58 +    asm volatile("mov %%cr3, %0" : "=r" (tmp) : );
   49.59 +    xen_core_regs->cr3 = tmp;
   49.60 +
   49.61 +    asm volatile("mov %%cr4, %0" : "=r" (tmp) : );
   49.62 +    xen_core_regs->cr4 = tmp;
   49.63 +}
   49.64 +
   49.65 +#endif /* __X86_32_ELF_H__ */
   49.66 +
   49.67 +/*
   49.68 + * Local variables:
   49.69 + * mode: C
   49.70 + * c-set-style: "BSD"
   49.71 + * c-basic-offset: 4
   49.72 + * tab-width: 4
   49.73 + * indent-tabs-mode: nil
   49.74 + * End:
   49.75 + */
    50.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    50.2 +++ b/xen/include/asm-x86/x86_32/kexec.h	Thu Nov 30 13:05:27 2006 +0000
    50.3 @@ -0,0 +1,40 @@
    50.4 +/******************************************************************************
    50.5 + * kexec.h
    50.6 + * 
    50.7 + * Based heavily on machine_kexec.c and kexec.h from Linux 2.6.19-rc1
    50.8 + *
    50.9 + */
   50.10 +  
   50.11 +#ifndef __X86_KEXEC_X86_32_H__
   50.12 +#define __X86_KEXEC_X86_32_H__
   50.13 +
   50.14 +#include <xen/types.h>
   50.15 +#include <xen/kexec.h>
   50.16 +#include <asm/fixmap.h>
   50.17 +
   50.18 +typedef asmlinkage void (*relocate_new_kernel_t)(
   50.19 +               unsigned long indirection_page,
   50.20 +               unsigned long page_list,
   50.21 +               unsigned long start_address,
   50.22 +               unsigned int has_pae);
   50.23 +
   50.24 +static inline void machine_kexec(xen_kexec_image_t *image)
   50.25 +{
   50.26 +    relocate_new_kernel_t rnk;
   50.27 +
   50.28 +    rnk = (relocate_new_kernel_t) image->page_list[1];
   50.29 +    (*rnk)(image->indirection_page, (unsigned long)image->page_list, 
   50.30 +           image->start_address, (unsigned long)cpu_has_pae);
   50.31 +}
   50.32 +
   50.33 +#endif /* __X86_KEXEC_X86_32_H__ */
   50.34 +
   50.35 +/*
   50.36 + * Local variables:
   50.37 + * mode: C
   50.38 + * c-set-style: "BSD"
   50.39 + * c-basic-offset: 4
   50.40 + * tab-width: 4
   50.41 + * indent-tabs-mode: nil
   50.42 + * End:
   50.43 + */
    51.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    51.2 +++ b/xen/include/asm-x86/x86_64/elf.h	Thu Nov 30 13:05:27 2006 +0000
    51.3 @@ -0,0 +1,92 @@
    51.4 +#ifndef __X86_64_ELF_H__
    51.5 +#define __X86_64_ELF_H__
    51.6 +
    51.7 +#include <asm/processor.h>
    51.8 +
    51.9 +typedef struct {
   51.10 +    unsigned long r15;
   51.11 +    unsigned long r14;
   51.12 +    unsigned long r13;
   51.13 +    unsigned long r12;
   51.14 +    unsigned long rbp;
   51.15 +    unsigned long rbx;
   51.16 +    unsigned long r11;
   51.17 +    unsigned long r10;
   51.18 +    unsigned long r9;
   51.19 +    unsigned long r8;
   51.20 +    unsigned long rax;
   51.21 +    unsigned long rcx;
   51.22 +    unsigned long rdx;
   51.23 +    unsigned long rsi;
   51.24 +    unsigned long rdi;
   51.25 +    unsigned long orig_rax;
   51.26 +    unsigned long rip;
   51.27 +    unsigned long cs;
   51.28 +    unsigned long eflags;
   51.29 +    unsigned long rsp;
   51.30 +    unsigned long ss;
   51.31 +    unsigned long thread_fs;
   51.32 +    unsigned long thread_gs;
   51.33 +    unsigned long ds;
   51.34 +    unsigned long es;
   51.35 +    unsigned long fs;
   51.36 +    unsigned long gs;
   51.37 +} ELF_Gregset;
   51.38 +
   51.39 +extern inline void elf_core_save_regs(ELF_Gregset *core_regs, 
   51.40 +                                      crash_xen_core_t *xen_core_regs)
   51.41 +{
   51.42 +    unsigned long tmp;
   51.43 +
   51.44 +    asm volatile("movq %%r15,%0" : "=m"(core_regs->r15));
   51.45 +    asm volatile("movq %%r14,%0" : "=m"(core_regs->r14));
   51.46 +    asm volatile("movq %%r13,%0" : "=m"(core_regs->r13));
   51.47 +    asm volatile("movq %%r12,%0" : "=m"(core_regs->r12));
   51.48 +    asm volatile("movq %%rbp,%0" : "=m"(core_regs->rbp));
   51.49 +    asm volatile("movq %%rbx,%0" : "=m"(core_regs->rbx));
   51.50 +    asm volatile("movq %%r11,%0" : "=m"(core_regs->r11));
   51.51 +    asm volatile("movq %%r10,%0" : "=m"(core_regs->r10));
   51.52 +    asm volatile("movq %%r9,%0" : "=m"(core_regs->r9));
   51.53 +    asm volatile("movq %%r8,%0" : "=m"(core_regs->r8));
   51.54 +    asm volatile("movq %%rax,%0" : "=m"(core_regs->rax));
   51.55 +    asm volatile("movq %%rcx,%0" : "=m"(core_regs->rcx));
   51.56 +    asm volatile("movq %%rdx,%0" : "=m"(core_regs->rdx));
   51.57 +    asm volatile("movq %%rsi,%0" : "=m"(core_regs->rsi));
   51.58 +    asm volatile("movq %%rdi,%0" : "=m"(core_regs->rdi));
   51.59 +    /* orig_rax not filled in for now */
   51.60 +    core_regs->rip = (unsigned long)current_text_addr();
   51.61 +    asm volatile("movl %%cs, %%eax;" :"=a"(core_regs->cs));
   51.62 +    asm volatile("pushfq; popq %0" :"=m"(core_regs->eflags));
   51.63 +    asm volatile("movq %%rsp,%0" : "=m"(core_regs->rsp));
   51.64 +    asm volatile("movl %%ss, %%eax;" :"=a"(core_regs->ss));
   51.65 +    /* thread_fs not filled in for now */
   51.66 +    /* thread_gs not filled in for now */
   51.67 +    asm volatile("movl %%ds, %%eax;" :"=a"(core_regs->ds));
   51.68 +    asm volatile("movl %%es, %%eax;" :"=a"(core_regs->es));
   51.69 +    asm volatile("movl %%fs, %%eax;" :"=a"(core_regs->fs));
   51.70 +    asm volatile("movl %%gs, %%eax;" :"=a"(core_regs->gs));
   51.71 +
   51.72 +    asm volatile("mov %%cr0, %0" : "=r" (tmp) : );
   51.73 +    xen_core_regs->cr0 = tmp;
   51.74 +
   51.75 +    asm volatile("mov %%cr2, %0" : "=r" (tmp) : );
   51.76 +    xen_core_regs->cr2 = tmp;
   51.77 +
   51.78 +    asm volatile("mov %%cr3, %0" : "=r" (tmp) : );
   51.79 +    xen_core_regs->cr3 = tmp;
   51.80 +
   51.81 +    asm volatile("mov %%cr4, %0" : "=r" (tmp) : );
   51.82 +    xen_core_regs->cr4 = tmp;
   51.83 +}
   51.84 +
   51.85 +#endif /* __X86_64_ELF_H__ */
   51.86 +
   51.87 +/*
   51.88 + * Local variables:
   51.89 + * mode: C
   51.90 + * c-set-style: "BSD"
   51.91 + * c-basic-offset: 4
   51.92 + * tab-width: 4
   51.93 + * indent-tabs-mode: nil
   51.94 + * End:
   51.95 + */
    52.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    52.2 +++ b/xen/include/asm-x86/x86_64/kexec.h	Thu Nov 30 13:05:27 2006 +0000
    52.3 @@ -0,0 +1,39 @@
    52.4 +/******************************************************************************
    52.5 + * kexec.h
    52.6 + * 
    52.7 + * Based heavily on machine_kexec.c and kexec.h from Linux 2.6.19-rc1
    52.8 + *
    52.9 + */
   52.10 +
   52.11 +#ifndef __X86_64_KEXEC_H__
   52.12 +#define __X86_64_KEXEC_H__
   52.13 +  
   52.14 +#include <xen/types.h>
   52.15 +#include <xen/kexec.h>
   52.16 +#include <asm/fixmap.h>
   52.17 +
   52.18 +typedef void (*relocate_new_kernel_t)(
   52.19 +                unsigned long indirection_page,
   52.20 +                unsigned long page_list,
   52.21 +                unsigned long start_address);
   52.22 +
   52.23 +static inline void machine_kexec(xen_kexec_image_t *image)
   52.24 +{
   52.25 +    relocate_new_kernel_t rnk;
   52.26 +
   52.27 +    rnk = (relocate_new_kernel_t) image->page_list[1];
   52.28 +    (*rnk)(image->indirection_page, (unsigned long)image->page_list, 
   52.29 +           image->start_address);
   52.30 +}
   52.31 +
   52.32 +#endif /* __X86_64_KEXEC_H__ */
   52.33 +
   52.34 +/*
   52.35 + * Local variables:
   52.36 + * mode: C
   52.37 + * c-set-style: "BSD"
   52.38 + * c-basic-offset: 4
   52.39 + * tab-width: 4
   52.40 + * indent-tabs-mode: nil
   52.41 + * End:
   52.42 + */
    53.1 --- a/xen/include/public/elfnote.h	Thu Nov 30 10:57:28 2006 +0000
    53.2 +++ b/xen/include/public/elfnote.h	Thu Nov 30 13:05:27 2006 +0000
    53.3 @@ -147,6 +147,25 @@
    53.4   */
    53.5  #define XEN_ELFNOTE_HV_START_LOW  12
    53.6  
    53.7 +/*
    53.8 + * System information exported through crash notes.
    53.9 + *
   53.10 + * The kexec / kdump code will create one XEN_ELFNOTE_CRASH_INFO 
   53.11 + * note in case of a system crash. This note will contain various
   53.12 + * information about the system, see xen/include/xen/elfcore.h.
   53.13 + */
   53.14 +#define XEN_ELFNOTE_CRASH_INFO 0x1000001
   53.15 +
   53.16 +/*
   53.17 + * System registers exported through crash notes.
   53.18 + *
   53.19 + * The kexec / kdump code will create one XEN_ELFNOTE_CRASH_REGS 
   53.20 + * note per cpu in case of a system crash. This note is architecture
   53.21 + * specific and will contain registers not saved in the "CORE" note.
   53.22 + * See xen/include/xen/elfcore.h for more information.
   53.23 + */
   53.24 +#define XEN_ELFNOTE_CRASH_REGS 0x1000002
   53.25 +
   53.26  #endif /* __XEN_PUBLIC_ELFNOTE_H__ */
   53.27  
   53.28  /*
    54.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    54.2 +++ b/xen/include/public/kexec.h	Thu Nov 30 13:05:27 2006 +0000
    54.3 @@ -0,0 +1,138 @@
    54.4 +/******************************************************************************
    54.5 + * kexec.h - Public portion
    54.6 + * 
    54.7 + * Xen port written by:
    54.8 + * - Simon 'Horms' Horman <horms@verge.net.au>
    54.9 + * - Magnus Damm <magnus@valinux.co.jp>
   54.10 + */
   54.11 +
   54.12 +#ifndef _XEN_PUBLIC_KEXEC_H
   54.13 +#define _XEN_PUBLIC_KEXEC_H
   54.14 +
   54.15 +
   54.16 +/* This file describes the Kexec / Kdump hypercall interface for Xen.
   54.17 + *
   54.18 + * Kexec under vanilla Linux allows a user to reboot the physical machine 
   54.19 + * into a new user-specified kernel. The Xen port extends this idea
   54.20 + * to allow rebooting of the machine from dom0. When kexec for dom0
   54.21 + * is used to reboot,  both the hypervisor and the domains get replaced
   54.22 + * with some other kernel. It is possible to kexec between vanilla
   54.23 + * Linux and Xen and back again. Xen to Xen works well too.
   54.24 + *
   54.25 + * The hypercall interface for kexec can be divided into three main
   54.26 + * types of hypercall operations:
   54.27 + *
   54.28 + * 1) Range information:
   54.29 + *    This is used by the dom0 kernel to ask the hypervisor about various 
   54.30 + *    address information. This information is needed to allow kexec-tools 
   54.31 + *    to fill in the ELF headers for /proc/vmcore properly.
   54.32 + *
   54.33 + * 2) Load and unload of images:
   54.34 + *    There are no big surprises here, the kexec binary from kexec-tools
   54.35 + *    runs in userspace in dom0. The tool loads/unloads data into the
   54.36 + *    dom0 kernel such as new kernel, initramfs and hypervisor. When
   54.37 + *    loaded the dom0 kernel performs a load hypercall operation, and
   54.38 + *    before releasing all page references the dom0 kernel calls unload.
   54.39 + *
   54.40 + * 3) Kexec operation:
   54.41 + *    This is used to start a previously loaded kernel.
   54.42 + */
   54.43 +
   54.44 +#include "xen.h"
   54.45 +
   54.46 +#if defined(__i386__) || defined(__x86_64__)
   54.47 +#define KEXEC_XEN_NO_PAGES 17
   54.48 +#endif
   54.49 +
   54.50 +/*
   54.51 + * Prototype for this hypercall is:
   54.52 + *  int kexec_op(int cmd, void *args)
   54.53 + * @cmd  == KEXEC_CMD_... 
   54.54 + *          KEXEC operation to perform
   54.55 + * @args == Operation-specific extra arguments (NULL if none).
   54.56 + */
   54.57 +
   54.58 +/*
   54.59 + * Kexec supports two types of operation:
   54.60 + * - kexec into a regular kernel, very similar to a standard reboot
   54.61 + *   - KEXEC_TYPE_DEFAULT is used to specify this type
   54.62 + * - kexec into a special "crash kernel", aka kexec-on-panic
   54.63 + *   - KEXEC_TYPE_CRASH is used to specify this type
   54.64 + *   - parts of our system may be broken at kexec-on-panic time
   54.65 + *     - the code should be kept as simple and self-contained as possible
   54.66 + */
   54.67 +
   54.68 +#define KEXEC_TYPE_DEFAULT 0
   54.69 +#define KEXEC_TYPE_CRASH   1
   54.70 +
   54.71 +
   54.72 +/* The kexec implementation for Xen allows the user to load two
   54.73 + * types of kernels, KEXEC_TYPE_DEFAULT and KEXEC_TYPE_CRASH.
   54.74 + * All data needed for a kexec reboot is kept in one xen_kexec_image_t
   54.75 + * per "instance". The data mainly consists of machine address lists to pages
   54.76 + * together with destination addresses. The data in xen_kexec_image_t
   54.77 + * is passed to the "code page" which is one page of code that performs
   54.78 + * the final relocations before jumping to the new kernel.
   54.79 + */
   54.80 + 
   54.81 +typedef struct xen_kexec_image {
   54.82 +#if defined(__i386__) || defined(__x86_64__)
   54.83 +    unsigned long page_list[KEXEC_XEN_NO_PAGES];
   54.84 +#endif
   54.85 +    unsigned long indirection_page;
   54.86 +    unsigned long start_address;
   54.87 +} xen_kexec_image_t;
   54.88 +
   54.89 +/*
   54.90 + * Perform kexec having previously loaded a kexec or kdump kernel
   54.91 + * as appropriate.
   54.92 + * type == KEXEC_TYPE_DEFAULT or KEXEC_TYPE_CRASH [in]
   54.93 + */
   54.94 +#define KEXEC_CMD_kexec                 0
   54.95 +typedef struct xen_kexec_exec {
   54.96 +    int type;
   54.97 +} xen_kexec_exec_t;
   54.98 +
   54.99 +/*
  54.100 + * Load/Unload kernel image for kexec or kdump.
  54.101 + * type  == KEXEC_TYPE_DEFAULT or KEXEC_TYPE_CRASH [in]
  54.102 + * image == relocation information for kexec (ignored for unload) [in]
  54.103 + */
  54.104 +#define KEXEC_CMD_kexec_load            1
  54.105 +#define KEXEC_CMD_kexec_unload          2
  54.106 +typedef struct xen_kexec_load {
  54.107 +    int type;
  54.108 +    xen_kexec_image_t image;
  54.109 +} xen_kexec_load_t;
  54.110 +
  54.111 +#define KEXEC_RANGE_MA_CRASH 0   /* machine address and size of crash area */
  54.112 +#define KEXEC_RANGE_MA_XEN   1   /* machine address and size of Xen itself */
  54.113 +#define KEXEC_RANGE_VA_XEN   2   /* virtual adrress and size of Xen itself */
  54.114 +#define KEXEC_RANGE_MA_CPU   3   /* machine address and size of a CPU note */
  54.115 +
  54.116 +/*
  54.117 + * Find the address and size of certain memory areas
  54.118 + * range == KEXEC_RANGE_... [in]
  54.119 + * nr    == physical CPU number (starting from 0) if KEXEC_RANGE_MA_CPU [in]
  54.120 + * size  == number of bytes reserved in window [out]
  54.121 + * start == address of the first byte in the window [out]
  54.122 + */
  54.123 +#define KEXEC_CMD_kexec_get_range       3
  54.124 +typedef struct xen_kexec_range {
  54.125 +    int range;
  54.126 +    int nr;
  54.127 +    unsigned long size;
  54.128 +    unsigned long start;
  54.129 +} xen_kexec_range_t;
  54.130 +
  54.131 +#endif /* _XEN_PUBLIC_KEXEC_H */
  54.132 +
  54.133 +/*
  54.134 + * Local variables:
  54.135 + * mode: C
  54.136 + * c-set-style: "BSD"
  54.137 + * c-basic-offset: 4
  54.138 + * tab-width: 4
  54.139 + * indent-tabs-mode: nil
  54.140 + * End:
  54.141 + */
    55.1 --- a/xen/include/xen/elf.h	Thu Nov 30 10:57:28 2006 +0000
    55.2 +++ b/xen/include/xen/elf.h	Thu Nov 30 13:05:27 2006 +0000
    55.3 @@ -452,18 +452,12 @@ unsigned int elf_hash(const unsigned cha
    55.4  /*
    55.5   * Note Definitions
    55.6   */
    55.7 -typedef struct {
    55.8 -	Elf32_Word namesz;
    55.9 -	Elf32_Word descsz;
   55.10 -	Elf32_Word type;
   55.11 -} Elf32_Note;
   55.12  
   55.13  typedef struct {
   55.14 -	Elf64_Half namesz;
   55.15 -	Elf64_Half descsz;
   55.16 -	Elf64_Half type;
   55.17 -} Elf64_Note;
   55.18 -
   55.19 +	u32 namesz;
   55.20 +	u32 descsz;
   55.21 +	u32 type;
   55.22 +} Elf_Note; /* same format for both 32-bit and 64-bit ELF */
   55.23  
   55.24  #if defined(ELFSIZE)
   55.25  #define CONCAT(x,y)	__CONCAT(x,y)
   55.26 @@ -486,7 +480,6 @@ typedef struct {
   55.27  #define Elf_Addr	Elf32_Addr
   55.28  #define Elf_Off		Elf32_Off
   55.29  #define Elf_Nhdr	Elf32_Nhdr
   55.30 -#define Elf_Note	Elf32_Note
   55.31  
   55.32  #define ELF_R_SYM	ELF32_R_SYM
   55.33  #define ELF_R_TYPE	ELF32_R_TYPE
   55.34 @@ -511,7 +504,6 @@ typedef struct {
   55.35  #define Elf_Addr	Elf64_Addr
   55.36  #define Elf_Off		Elf64_Off
   55.37  #define Elf_Nhdr	Elf64_Nhdr
   55.38 -#define Elf_Note	Elf64_Note
   55.39  
   55.40  #define ELF_R_SYM	ELF64_R_SYM
   55.41  #define ELF_R_TYPE	ELF64_R_TYPE
    56.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    56.2 +++ b/xen/include/xen/elfcore.h	Thu Nov 30 13:05:27 2006 +0000
    56.3 @@ -0,0 +1,140 @@
    56.4 +/******************************************************************************
    56.5 + * elfcore.h
    56.6 + *
    56.7 + * Based heavily on include/linux/elfcore.h from Linux 2.6.16
    56.8 + * Naming scheeme based on include/xen/elf.h (not include/linux/elfcore.h)
    56.9 + *
   56.10 + */
   56.11 +
   56.12 +#ifndef __ELFCOREC_H__
   56.13 +#define __ELFCOREC_H__
   56.14 +
   56.15 +#include <xen/types.h>
   56.16 +#include <xen/elf.h>
   56.17 +#include <asm/elf.h>
   56.18 +#include <public/xen.h>
   56.19 +
   56.20 +#define NT_PRSTATUS     1
   56.21 +
   56.22 +typedef struct
   56.23 +{
   56.24 +    int signo;                       /* signal number */
   56.25 +    int code;                        /* extra code */
   56.26 +    int errno;                       /* errno */
   56.27 +} ELF_Signifo;
   56.28 +
   56.29 +/* These seem to be the same length on all architectures on Linux */
   56.30 +typedef int ELF_Pid;
   56.31 +typedef struct {
   56.32 +	long tv_sec;
   56.33 +	long tv_usec;
   56.34 +} ELF_Timeval;
   56.35 +
   56.36 +/*
   56.37 + * Definitions to generate Intel SVR4-like core files.
   56.38 + * These mostly have the same names as the SVR4 types with "elf_"
   56.39 + * tacked on the front to prevent clashes with linux definitions,
   56.40 + * and the typedef forms have been avoided.  This is mostly like
   56.41 + * the SVR4 structure, but more Linuxy, with things that Linux does
   56.42 + * not support and which gdb doesn't really use excluded.
   56.43 + */
   56.44 +typedef struct
   56.45 +{
   56.46 +    ELF_Signifo pr_info;         /* Info associated with signal */
   56.47 +    short pr_cursig;             /* Current signal */
   56.48 +    unsigned long pr_sigpend;    /* Set of pending signals */
   56.49 +    unsigned long pr_sighold;    /* Set of held signals */
   56.50 +    ELF_Pid pr_pid;
   56.51 +    ELF_Pid pr_ppid;
   56.52 +    ELF_Pid pr_pgrp;
   56.53 +    ELF_Pid pr_sid;
   56.54 +    ELF_Timeval pr_utime;        /* User time */
   56.55 +    ELF_Timeval pr_stime;        /* System time */
   56.56 +    ELF_Timeval pr_cutime;       /* Cumulative user time */
   56.57 +    ELF_Timeval pr_cstime;       /* Cumulative system time */
   56.58 +    ELF_Gregset pr_reg;          /* GP registers - from asm header file */
   56.59 +    int pr_fpvalid;              /* True if math co-processor being used.  */
   56.60 +} ELF_Prstatus;
   56.61 +
   56.62 +/*
   56.63 + * The following data structures provide 64-bit ELF notes. In theory it should 
   56.64 + * be possible to support both 64-bit and 32-bit ELF files, but to keep it 
   56.65 + * simple we only do 64-bit.
   56.66 + *
   56.67 + * Please note that the current code aligns the 64-bit notes in the same
   56.68 + * way as Linux does. We are not following the 64-bit ELF spec, no one does.
   56.69 + *
   56.70 + * We are avoiding two problems by restricting us to 64-bit notes only:
   56.71 + * - Alignment of notes change with the word size. Ick.
   56.72 + * - We would need to tell kexec-tools which format we are using in the
   56.73 + *   hypervisor to make sure the right ELF format is generated.
   56.74 + *   That requires infrastructure. Let's not.
   56.75 + */
   56.76 +
   56.77 +#define ALIGN(x, n) ((x + ((1 << n) - 1)) / (1 << n))
   56.78 +#define PAD32(x) u32 pad_data[ALIGN(x, 2)]
   56.79 +
   56.80 +#define TYPEDEF_NOTE(type, strlen, desctype)    \
   56.81 +    typedef struct {                            \
   56.82 +        union {                                 \
   56.83 +            struct {                            \
   56.84 +                Elf_Note note;                  \
   56.85 +                unsigned char name[strlen];     \
   56.86 +            } note;                             \
   56.87 +            PAD32(sizeof(Elf_Note) + strlen);   \
   56.88 +        } note;                                 \
   56.89 +        union {                                 \
   56.90 +            desctype desc;                      \
   56.91 +            PAD32(sizeof(desctype));            \
   56.92 +        } desc;                                 \
   56.93 +    } __attribute__ ((packed)) type
   56.94 +
   56.95 +#define CORE_STR                "CORE"
   56.96 +#define CORE_STR_LEN            5 /* including terminating zero */
   56.97 +
   56.98 +TYPEDEF_NOTE(crash_note_core_t, CORE_STR_LEN, ELF_Prstatus);
   56.99 +
  56.100 +#define XEN_STR                 "Xen"
  56.101 +#define XEN_STR_LEN             4 /* including terminating zero */
  56.102 +
  56.103 +TYPEDEF_NOTE(crash_note_xen_core_t, XEN_STR_LEN, crash_xen_core_t);
  56.104 +
  56.105 +typedef struct {
  56.106 +    unsigned long xen_major_version;
  56.107 +    unsigned long xen_minor_version;
  56.108 +    unsigned long xen_extra_version;
  56.109 +    unsigned long xen_changeset;
  56.110 +    unsigned long xen_compiler;
  56.111 +    unsigned long xen_compile_date;
  56.112 +    unsigned long xen_compile_time;
  56.113 +    unsigned long tainted;
  56.114 +#ifdef CONFIG_X86
  56.115 +    unsigned long dom0_pfn_to_mfn_frame_list_list;
  56.116 +#endif
  56.117 +} crash_xen_info_t;
  56.118 +
  56.119 +TYPEDEF_NOTE(crash_note_xen_info_t, XEN_STR_LEN, crash_xen_info_t);
  56.120 +
  56.121 +typedef struct {
  56.122 +    crash_note_core_t core;
  56.123 +    crash_note_xen_core_t xen_regs;
  56.124 +    crash_note_xen_info_t xen_info;
  56.125 +} __attribute__ ((packed)) crash_note_t;
  56.126 +
  56.127 +#define setup_crash_note(np, member, str, str_len, id) \
  56.128 +  np->member.note.note.note.namesz = str_len; \
  56.129 +  np->member.note.note.note.descsz = sizeof(np->member.desc.desc); \
  56.130 +  np->member.note.note.note.type = id; \
  56.131 +  memcpy(np->member.note.note.name, str, str_len)
  56.132 +
  56.133 +#endif /* __ELFCOREC_H__ */
  56.134 +
  56.135 +/*
  56.136 + * Local variables:
  56.137 + * mode: C
  56.138 + * c-set-style: "BSD"
  56.139 + * c-basic-offset: 4
  56.140 + * tab-width: 4
  56.141 + * indent-tabs-mode: nil
  56.142 + * End:
  56.143 + */
    57.1 --- a/xen/include/xen/hypercall.h	Thu Nov 30 10:57:28 2006 +0000
    57.2 +++ b/xen/include/xen/hypercall.h	Thu Nov 30 13:05:27 2006 +0000
    57.3 @@ -102,4 +102,10 @@ do_hvm_op(
    57.4      unsigned long op,
    57.5      XEN_GUEST_HANDLE(void) arg);
    57.6  
    57.7 +extern long
    57.8 +do_kexec_op(
    57.9 +    unsigned long op,
   57.10 +    int arg1,
   57.11 +    XEN_GUEST_HANDLE(void) arg);
   57.12 +
   57.13  #endif /* __XEN_HYPERCALL_H__ */
    58.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    58.2 +++ b/xen/include/xen/kexec.h	Thu Nov 30 13:05:27 2006 +0000
    58.3 @@ -0,0 +1,43 @@
    58.4 +#ifndef __XEN_KEXEC_H__
    58.5 +#define __XEN_KEXEC_H__
    58.6 +
    58.7 +#include <public/kexec.h>
    58.8 +#include <asm/percpu.h>
    58.9 +#include <xen/elfcore.h>
   58.10 +
   58.11 +extern int crashing_cpu;
   58.12 +
   58.13 +typedef struct xen_kexec_reserve {
   58.14 +    unsigned long size;
   58.15 +    unsigned long start;
   58.16 +} xen_kexec_reserve_t;
   58.17 +
   58.18 +/* We have space for 4 images to support atomic update
   58.19 + * of images. This is important for CRASH images since
   58.20 + * a panic can happen at any time...
   58.21 + */
   58.22 +
   58.23 +#define KEXEC_IMAGE_DEFAULT_BASE 0
   58.24 +#define KEXEC_IMAGE_CRASH_BASE   2
   58.25 +#define KEXEC_IMAGE_NR           4
   58.26 +
   58.27 +int machine_kexec_load(int type, int slot, xen_kexec_image_t *image);
   58.28 +void machine_kexec_unload(int type, int slot, xen_kexec_image_t *image);
   58.29 +void machine_kexec_reserved(xen_kexec_reserve_t *reservation);
   58.30 +void machine_shutdown(xen_kexec_image_t *image);
   58.31 +void machine_crash_kexec(void);
   58.32 +void machine_crash_save_cpu(void);
   58.33 +crash_xen_info_t *machine_crash_save_info(void);
   58.34 +void machine_crash_shutdown(void);
   58.35 +
   58.36 +#endif /* __XEN_KEXEC_H__ */
   58.37 +
   58.38 +/*
   58.39 + * Local variables:
   58.40 + * mode: C
   58.41 + * c-set-style: "BSD"
   58.42 + * c-basic-offset: 4
   58.43 + * tab-width: 4
   58.44 + * indent-tabs-mode: nil
   58.45 + * End:
   58.46 + */
    59.1 --- a/xen/include/xen/mm.h	Thu Nov 30 10:57:28 2006 +0000
    59.2 +++ b/xen/include/xen/mm.h	Thu Nov 30 13:05:27 2006 +0000
    59.3 @@ -40,6 +40,7 @@ struct page_info;
    59.4  paddr_t init_boot_allocator(paddr_t bitmap_start);
    59.5  void init_boot_pages(paddr_t ps, paddr_t pe);
    59.6  unsigned long alloc_boot_pages(unsigned long nr_pfns, unsigned long pfn_align);
    59.7 +unsigned long alloc_boot_pages_at(unsigned long nr_pfns, unsigned long pfn_at);
    59.8  void end_boot_allocator(void);
    59.9  
   59.10  /* Generic allocator. These functions are *not* interrupt-safe. */