ia64/xen-unstable

changeset 10270:0a226de3fc37

[IA64] Update efi.c and rework xenheap location

This is a port of a newer upsteam efi.c to xen/ia64. For the most
part, this patch is simply incorporating this upstream linux-ia64
patch into the tree:

http://www.kernel.org/hg/linux-2.6/?cs=fb781f6d3e81

To support this new code, xensetup needed to be modified to relocate
the dom0 kernel and initrd images without using efi_memmap_walk() as
this can no longer be called until after reserve_memory(). The dom0
kernel and initrd images are now only moved if necessary and the xen
MDT entry is expanded to cover the xenheap area and any relocated dom0
bits.

Signed-off-by: Alex Williamson <alex.williamson@hp.com>
author awilliam@xenbuild.aw
date Tue Jun 13 08:45:22 2006 -0600 (2006-06-13)
parents aa2298739112
children 0d149b326235
files xen/arch/ia64/linux-xen/efi.c xen/arch/ia64/linux-xen/setup.c xen/arch/ia64/xen/xensetup.c xen/include/asm-ia64/linux-xen/asm/meminit.h
line diff
     1.1 --- a/xen/arch/ia64/linux-xen/efi.c	Fri Jun 09 10:40:31 2006 -0600
     1.2 +++ b/xen/arch/ia64/linux-xen/efi.c	Tue Jun 13 08:45:22 2006 -0600
     1.3 @@ -246,57 +246,30 @@ is_available_memory (efi_memory_desc_t *
     1.4  	return 0;
     1.5  }
     1.6  
     1.7 -/*
     1.8 - * Trim descriptor MD so its starts at address START_ADDR.  If the descriptor covers
     1.9 - * memory that is normally available to the kernel, issue a warning that some memory
    1.10 - * is being ignored.
    1.11 - */
    1.12 -static void
    1.13 -trim_bottom (efi_memory_desc_t *md, u64 start_addr)
    1.14 -{
    1.15 -	u64 num_skipped_pages;
    1.16 -
    1.17 -	if (md->phys_addr >= start_addr || !md->num_pages)
    1.18 -		return;
    1.19 +typedef struct kern_memdesc {
    1.20 +	u64 attribute;
    1.21 +	u64 start;
    1.22 +	u64 num_pages;
    1.23 +} kern_memdesc_t;
    1.24  
    1.25 -	num_skipped_pages = (start_addr - md->phys_addr) >> EFI_PAGE_SHIFT;
    1.26 -	if (num_skipped_pages > md->num_pages)
    1.27 -		num_skipped_pages = md->num_pages;
    1.28 -
    1.29 -	if (is_available_memory(md))
    1.30 -		printk(KERN_NOTICE "efi.%s: ignoring %luKB of memory at 0x%lx due to granule hole "
    1.31 -		       "at 0x%lx\n", __FUNCTION__,
    1.32 -		       (num_skipped_pages << EFI_PAGE_SHIFT) >> 10,
    1.33 -		       md->phys_addr, start_addr - IA64_GRANULE_SIZE);
    1.34 -	/*
    1.35 -	 * NOTE: Don't set md->phys_addr to START_ADDR because that could cause the memory
    1.36 -	 * descriptor list to become unsorted.  In such a case, md->num_pages will be
    1.37 -	 * zero, so the Right Thing will happen.
    1.38 -	 */
    1.39 -	md->phys_addr += num_skipped_pages << EFI_PAGE_SHIFT;
    1.40 -	md->num_pages -= num_skipped_pages;
    1.41 -}
    1.42 +static kern_memdesc_t *kern_memmap;
    1.43  
    1.44  static void
    1.45 -trim_top (efi_memory_desc_t *md, u64 end_addr)
    1.46 +walk (efi_freemem_callback_t callback, void *arg, u64 attr)
    1.47  {
    1.48 -	u64 num_dropped_pages, md_end_addr;
    1.49 -
    1.50 -	md_end_addr = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
    1.51 -
    1.52 -	if (md_end_addr <= end_addr || !md->num_pages)
    1.53 -		return;
    1.54 +	kern_memdesc_t *k;
    1.55 +	u64 start, end, voff;
    1.56  
    1.57 -	num_dropped_pages = (md_end_addr - end_addr) >> EFI_PAGE_SHIFT;
    1.58 -	if (num_dropped_pages > md->num_pages)
    1.59 -		num_dropped_pages = md->num_pages;
    1.60 -
    1.61 -	if (is_available_memory(md))
    1.62 -		printk(KERN_NOTICE "efi.%s: ignoring %luKB of memory at 0x%lx due to granule hole "
    1.63 -		       "at 0x%lx\n", __FUNCTION__,
    1.64 -		       (num_dropped_pages << EFI_PAGE_SHIFT) >> 10,
    1.65 -		       md->phys_addr, end_addr);
    1.66 -	md->num_pages -= num_dropped_pages;
    1.67 +	voff = (attr == EFI_MEMORY_WB) ? PAGE_OFFSET : __IA64_UNCACHED_OFFSET;
    1.68 +	for (k = kern_memmap; k->start != ~0UL; k++) {
    1.69 +		if (k->attribute != attr)
    1.70 +			continue;
    1.71 +		start = PAGE_ALIGN(k->start);
    1.72 +		end = (k->start + (k->num_pages << EFI_PAGE_SHIFT)) & PAGE_MASK;
    1.73 +		if (start < end)
    1.74 +			if ((*callback)(start + voff, end + voff, arg) < 0)
    1.75 +				return;
    1.76 +	}
    1.77  }
    1.78  
    1.79  /*
    1.80 @@ -306,153 +279,17 @@ trim_top (efi_memory_desc_t *md, u64 end
    1.81  void
    1.82  efi_memmap_walk (efi_freemem_callback_t callback, void *arg)
    1.83  {
    1.84 -	int prev_valid = 0;
    1.85 -	struct range {
    1.86 -		u64 start;
    1.87 -		u64 end;
    1.88 -	} prev, curr;
    1.89 -	void *efi_map_start, *efi_map_end, *p, *q;
    1.90 -	efi_memory_desc_t *md, *check_md;
    1.91 -	u64 efi_desc_size, start, end, granule_addr, last_granule_addr, first_non_wb_addr = 0;
    1.92 -	unsigned long total_mem = 0;
    1.93 -
    1.94 -	efi_map_start = __va(ia64_boot_param->efi_memmap);
    1.95 -	efi_map_end   = efi_map_start + ia64_boot_param->efi_memmap_size;
    1.96 -	efi_desc_size = ia64_boot_param->efi_memdesc_size;
    1.97 -
    1.98 -	for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
    1.99 -		md = p;
   1.100 -
   1.101 -		/* skip over non-WB memory descriptors; that's all we're interested in... */
   1.102 -		if (!(md->attribute & EFI_MEMORY_WB))
   1.103 -			continue;
   1.104 -
   1.105 -#ifdef XEN
   1.106 -// this works around a problem in the ski bootloader
   1.107 -{
   1.108 -		extern long running_on_sim;
   1.109 -		if (running_on_sim && md->type != EFI_CONVENTIONAL_MEMORY)
   1.110 -			continue;
   1.111 -}
   1.112 -#endif
   1.113 -		/*
   1.114 -		 * granule_addr is the base of md's first granule.
   1.115 -		 * [granule_addr - first_non_wb_addr) is guaranteed to
   1.116 -		 * be contiguous WB memory.
   1.117 -		 */
   1.118 -		granule_addr = GRANULEROUNDDOWN(md->phys_addr);
   1.119 -		first_non_wb_addr = max(first_non_wb_addr, granule_addr);
   1.120 -
   1.121 -		if (first_non_wb_addr < md->phys_addr) {
   1.122 -			trim_bottom(md, granule_addr + IA64_GRANULE_SIZE);
   1.123 -			granule_addr = GRANULEROUNDDOWN(md->phys_addr);
   1.124 -			first_non_wb_addr = max(first_non_wb_addr, granule_addr);
   1.125 -		}
   1.126 -
   1.127 -		for (q = p; q < efi_map_end; q += efi_desc_size) {
   1.128 -			check_md = q;
   1.129 -
   1.130 -			if ((check_md->attribute & EFI_MEMORY_WB) &&
   1.131 -			    (check_md->phys_addr == first_non_wb_addr))
   1.132 -				first_non_wb_addr += check_md->num_pages << EFI_PAGE_SHIFT;
   1.133 -			else
   1.134 -				break;		/* non-WB or hole */
   1.135 -		}
   1.136 -
   1.137 -		last_granule_addr = GRANULEROUNDDOWN(first_non_wb_addr);
   1.138 -		if (last_granule_addr < md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT))
   1.139 -			trim_top(md, last_granule_addr);
   1.140 -
   1.141 -		if (is_available_memory(md)) {
   1.142 -			if (md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) >= max_addr) {
   1.143 -				if (md->phys_addr >= max_addr)
   1.144 -					continue;
   1.145 -				md->num_pages = (max_addr - md->phys_addr) >> EFI_PAGE_SHIFT;
   1.146 -				first_non_wb_addr = max_addr;
   1.147 -			}
   1.148 -
   1.149 -			if (total_mem >= mem_limit)
   1.150 -				continue;
   1.151 -
   1.152 -			if (total_mem + (md->num_pages << EFI_PAGE_SHIFT) > mem_limit) {
   1.153 -				unsigned long limit_addr = md->phys_addr;
   1.154 -
   1.155 -				limit_addr += mem_limit - total_mem;
   1.156 -				limit_addr = GRANULEROUNDDOWN(limit_addr);
   1.157 -
   1.158 -				if (md->phys_addr > limit_addr)
   1.159 -					continue;
   1.160 -
   1.161 -				md->num_pages = (limit_addr - md->phys_addr) >>
   1.162 -				                EFI_PAGE_SHIFT;
   1.163 -				first_non_wb_addr = max_addr = md->phys_addr +
   1.164 -				              (md->num_pages << EFI_PAGE_SHIFT);
   1.165 -			}
   1.166 -			total_mem += (md->num_pages << EFI_PAGE_SHIFT);
   1.167 -
   1.168 -			if (md->num_pages == 0)
   1.169 -				continue;
   1.170 -
   1.171 -			curr.start = PAGE_OFFSET + md->phys_addr;
   1.172 -			curr.end   = curr.start + (md->num_pages << EFI_PAGE_SHIFT);
   1.173 -
   1.174 -			if (!prev_valid) {
   1.175 -				prev = curr;
   1.176 -				prev_valid = 1;
   1.177 -			} else {
   1.178 -				if (curr.start < prev.start)
   1.179 -					printk(KERN_ERR "Oops: EFI memory table not ordered!\n");
   1.180 -
   1.181 -				if (prev.end == curr.start) {
   1.182 -					/* merge two consecutive memory ranges */
   1.183 -					prev.end = curr.end;
   1.184 -				} else {
   1.185 -					start = PAGE_ALIGN(prev.start);
   1.186 -					end = prev.end & PAGE_MASK;
   1.187 -					if ((end > start) && (*callback)(start, end, arg) < 0)
   1.188 -						return;
   1.189 -					prev = curr;
   1.190 -				}
   1.191 -			}
   1.192 -		}
   1.193 -	}
   1.194 -	if (prev_valid) {
   1.195 -		start = PAGE_ALIGN(prev.start);
   1.196 -		end = prev.end & PAGE_MASK;
   1.197 -		if (end > start)
   1.198 -			(*callback)(start, end, arg);
   1.199 -	}
   1.200 +	walk(callback, arg, EFI_MEMORY_WB);
   1.201  }
   1.202  
   1.203  /*
   1.204 - * Walk the EFI memory map to pull out leftover pages in the lower
   1.205 - * memory regions which do not end up in the regular memory map and
   1.206 - * stick them into the uncached allocator
   1.207 - *
   1.208 - * The regular walk function is significantly more complex than the
   1.209 - * uncached walk which means it really doesn't make sense to try and
   1.210 - * marge the two.
   1.211 + * Walks the EFI memory map and calls CALLBACK once for each EFI memory descriptor that
   1.212 + * has memory that is available for uncached allocator.
   1.213   */
   1.214 -void __init
   1.215 -efi_memmap_walk_uc (efi_freemem_callback_t callback)
   1.216 +void
   1.217 +efi_memmap_walk_uc (efi_freemem_callback_t callback, void *arg)
   1.218  {
   1.219 -	void *efi_map_start, *efi_map_end, *p;
   1.220 -	efi_memory_desc_t *md;
   1.221 -	u64 efi_desc_size, start, end;
   1.222 -
   1.223 -	efi_map_start = __va(ia64_boot_param->efi_memmap);
   1.224 -	efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
   1.225 -	efi_desc_size = ia64_boot_param->efi_memdesc_size;
   1.226 -
   1.227 -	for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
   1.228 -		md = p;
   1.229 -		if (md->attribute == EFI_MEMORY_UC) {
   1.230 -			start = PAGE_ALIGN(md->phys_addr);
   1.231 -			end = PAGE_ALIGN((md->phys_addr+(md->num_pages << EFI_PAGE_SHIFT)) & PAGE_MASK);
   1.232 -			if ((*callback)(start, end, NULL) < 0)
   1.233 -				return;
   1.234 -		}
   1.235 -	}
   1.236 +	walk(callback, arg, EFI_MEMORY_UC);
   1.237  }
   1.238  
   1.239  #ifdef XEN
   1.240 @@ -799,30 +636,6 @@ efi_get_iobase (void)
   1.241  	return 0;
   1.242  }
   1.243  
   1.244 -#ifdef XEN
   1.245 -// variation of efi_get_iobase which returns entire memory descriptor
   1.246 -efi_memory_desc_t *
   1.247 -efi_get_io_md (void)
   1.248 -{
   1.249 -	void *efi_map_start, *efi_map_end, *p;
   1.250 -	efi_memory_desc_t *md;
   1.251 -	u64 efi_desc_size;
   1.252 -
   1.253 -	efi_map_start = __va(ia64_boot_param->efi_memmap);
   1.254 -	efi_map_end   = efi_map_start + ia64_boot_param->efi_memmap_size;
   1.255 -	efi_desc_size = ia64_boot_param->efi_memdesc_size;
   1.256 -
   1.257 -	for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
   1.258 -		md = p;
   1.259 -		if (md->type == EFI_MEMORY_MAPPED_IO_PORT_SPACE) {
   1.260 -			if (md->attribute & EFI_MEMORY_UC)
   1.261 -				return md;
   1.262 -		}
   1.263 -	}
   1.264 -	return 0;
   1.265 -}
   1.266 -#endif
   1.267 -
   1.268  u32
   1.269  efi_mem_type (unsigned long phys_addr)
   1.270  {
   1.271 @@ -934,3 +747,228 @@ efi_uart_console_only(void)
   1.272  	printk(KERN_ERR "Malformed %s value\n", name);
   1.273  	return 0;
   1.274  }
   1.275 +
   1.276 +#define efi_md_size(md)	(md->num_pages << EFI_PAGE_SHIFT)
   1.277 +
   1.278 +static inline u64
   1.279 +kmd_end(kern_memdesc_t *kmd)
   1.280 +{
   1.281 +	return (kmd->start + (kmd->num_pages << EFI_PAGE_SHIFT));
   1.282 +}
   1.283 +
   1.284 +static inline u64
   1.285 +efi_md_end(efi_memory_desc_t *md)
   1.286 +{
   1.287 +	return (md->phys_addr + efi_md_size(md));
   1.288 +}
   1.289 +
   1.290 +static inline int
   1.291 +efi_wb(efi_memory_desc_t *md)
   1.292 +{
   1.293 +	return (md->attribute & EFI_MEMORY_WB);
   1.294 +}
   1.295 +
   1.296 +static inline int
   1.297 +efi_uc(efi_memory_desc_t *md)
   1.298 +{
   1.299 +	return (md->attribute & EFI_MEMORY_UC);
   1.300 +}
   1.301 +
   1.302 +/*
   1.303 + * Look for the first granule aligned memory descriptor memory
   1.304 + * that is big enough to hold EFI memory map. Make sure this
   1.305 + * descriptor is atleast granule sized so it does not get trimmed
   1.306 + */
   1.307 +struct kern_memdesc *
   1.308 +find_memmap_space (void)
   1.309 +{
   1.310 +	u64	contig_low=0, contig_high=0;
   1.311 +	u64	as = 0, ae;
   1.312 +	void *efi_map_start, *efi_map_end, *p, *q;
   1.313 +	efi_memory_desc_t *md, *pmd = NULL, *check_md;
   1.314 +	u64	space_needed, efi_desc_size;
   1.315 +	unsigned long total_mem = 0;
   1.316 +
   1.317 +	efi_map_start = __va(ia64_boot_param->efi_memmap);
   1.318 +	efi_map_end   = efi_map_start + ia64_boot_param->efi_memmap_size;
   1.319 +	efi_desc_size = ia64_boot_param->efi_memdesc_size;
   1.320 +
   1.321 +	/*
   1.322 +	 * Worst case: we need 3 kernel descriptors for each efi descriptor
   1.323 +	 * (if every entry has a WB part in the middle, and UC head and tail),
   1.324 +	 * plus one for the end marker.
   1.325 +	 */
   1.326 +	space_needed = sizeof(kern_memdesc_t) *
   1.327 +		(3 * (ia64_boot_param->efi_memmap_size/efi_desc_size) + 1);
   1.328 +
   1.329 +	for (p = efi_map_start; p < efi_map_end; pmd = md, p += efi_desc_size) {
   1.330 +		md = p;
   1.331 +		if (!efi_wb(md)) {
   1.332 +			continue;
   1.333 +		}
   1.334 +		if (pmd == NULL || !efi_wb(pmd) || efi_md_end(pmd) != md->phys_addr) {
   1.335 +			contig_low = GRANULEROUNDUP(md->phys_addr);
   1.336 +			contig_high = efi_md_end(md);
   1.337 +			for (q = p + efi_desc_size; q < efi_map_end; q += efi_desc_size) {
   1.338 +				check_md = q;
   1.339 +				if (!efi_wb(check_md))
   1.340 +					break;
   1.341 +				if (contig_high != check_md->phys_addr)
   1.342 +					break;
   1.343 +				contig_high = efi_md_end(check_md);
   1.344 +			}
   1.345 +			contig_high = GRANULEROUNDDOWN(contig_high);
   1.346 +		}
   1.347 +		if (!is_available_memory(md) || md->type == EFI_LOADER_DATA)
   1.348 +			continue;
   1.349 +
   1.350 +		/* Round ends inward to granule boundaries */
   1.351 +		as = max(contig_low, md->phys_addr);
   1.352 +		ae = min(contig_high, efi_md_end(md));
   1.353 +
   1.354 +		/* keep within max_addr= command line arg */
   1.355 +		ae = min(ae, max_addr);
   1.356 +		if (ae <= as)
   1.357 +			continue;
   1.358 +
   1.359 +		/* avoid going over mem= command line arg */
   1.360 +		if (total_mem + (ae - as) > mem_limit)
   1.361 +			ae -= total_mem + (ae - as) - mem_limit;
   1.362 +
   1.363 +		if (ae <= as)
   1.364 +			continue;
   1.365 +
   1.366 +		if (ae - as > space_needed)
   1.367 +			break;
   1.368 +	}
   1.369 +	if (p >= efi_map_end)
   1.370 +		panic("Can't allocate space for kernel memory descriptors");
   1.371 +
   1.372 +	return __va(as);
   1.373 +}
   1.374 +
   1.375 +/*
   1.376 + * Walk the EFI memory map and gather all memory available for kernel
   1.377 + * to use.  We can allocate partial granules only if the unavailable
   1.378 + * parts exist, and are WB.
   1.379 + */
   1.380 +void
   1.381 +efi_memmap_init(unsigned long *s, unsigned long *e)
   1.382 +{
   1.383 +	struct kern_memdesc *k, *prev = 0;
   1.384 +	u64	contig_low=0, contig_high=0;
   1.385 +	u64	as, ae, lim;
   1.386 +	void *efi_map_start, *efi_map_end, *p, *q;
   1.387 +	efi_memory_desc_t *md, *pmd = NULL, *check_md;
   1.388 +	u64	efi_desc_size;
   1.389 +	unsigned long total_mem = 0;
   1.390 +
   1.391 +	k = kern_memmap = find_memmap_space();
   1.392 +
   1.393 +	efi_map_start = __va(ia64_boot_param->efi_memmap);
   1.394 +	efi_map_end   = efi_map_start + ia64_boot_param->efi_memmap_size;
   1.395 +	efi_desc_size = ia64_boot_param->efi_memdesc_size;
   1.396 +
   1.397 +	for (p = efi_map_start; p < efi_map_end; pmd = md, p += efi_desc_size) {
   1.398 +		md = p;
   1.399 +		if (!efi_wb(md)) {
   1.400 +			if (efi_uc(md) && (md->type == EFI_CONVENTIONAL_MEMORY ||
   1.401 +				    	   md->type == EFI_BOOT_SERVICES_DATA)) {
   1.402 +				k->attribute = EFI_MEMORY_UC;
   1.403 +				k->start = md->phys_addr;
   1.404 +				k->num_pages = md->num_pages;
   1.405 +				k++;
   1.406 +			}
   1.407 +			continue;
   1.408 +		}
   1.409 +#ifdef XEN
   1.410 +// this works around a problem in the ski bootloader
   1.411 +{
   1.412 +		extern long running_on_sim;
   1.413 +		if (running_on_sim && md->type != EFI_CONVENTIONAL_MEMORY)
   1.414 +			continue;
   1.415 +}
   1.416 +#endif
   1.417 +		if (pmd == NULL || !efi_wb(pmd) || efi_md_end(pmd) != md->phys_addr) {
   1.418 +			contig_low = GRANULEROUNDUP(md->phys_addr);
   1.419 +			contig_high = efi_md_end(md);
   1.420 +			for (q = p + efi_desc_size; q < efi_map_end; q += efi_desc_size) {
   1.421 +				check_md = q;
   1.422 +				if (!efi_wb(check_md))
   1.423 +					break;
   1.424 +				if (contig_high != check_md->phys_addr)
   1.425 +					break;
   1.426 +				contig_high = efi_md_end(check_md);
   1.427 +			}
   1.428 +			contig_high = GRANULEROUNDDOWN(contig_high);
   1.429 +		}
   1.430 +		if (!is_available_memory(md))
   1.431 +			continue;
   1.432 +
   1.433 +		/*
   1.434 +		 * Round ends inward to granule boundaries
   1.435 +		 * Give trimmings to uncached allocator
   1.436 +		 */
   1.437 +		if (md->phys_addr < contig_low) {
   1.438 +			lim = min(efi_md_end(md), contig_low);
   1.439 +			if (efi_uc(md)) {
   1.440 +				if (k > kern_memmap && (k-1)->attribute == EFI_MEMORY_UC &&
   1.441 +				    kmd_end(k-1) == md->phys_addr) {
   1.442 +					(k-1)->num_pages += (lim - md->phys_addr) >> EFI_PAGE_SHIFT;
   1.443 +				} else {
   1.444 +					k->attribute = EFI_MEMORY_UC;
   1.445 +					k->start = md->phys_addr;
   1.446 +					k->num_pages = (lim - md->phys_addr) >> EFI_PAGE_SHIFT;
   1.447 +					k++;
   1.448 +				}
   1.449 +			}
   1.450 +			as = contig_low;
   1.451 +		} else
   1.452 +			as = md->phys_addr;
   1.453 +
   1.454 +		if (efi_md_end(md) > contig_high) {
   1.455 +			lim = max(md->phys_addr, contig_high);
   1.456 +			if (efi_uc(md)) {
   1.457 +				if (lim == md->phys_addr && k > kern_memmap &&
   1.458 +				    (k-1)->attribute == EFI_MEMORY_UC &&
   1.459 +				    kmd_end(k-1) == md->phys_addr) {
   1.460 +					(k-1)->num_pages += md->num_pages;
   1.461 +				} else {
   1.462 +					k->attribute = EFI_MEMORY_UC;
   1.463 +					k->start = lim;
   1.464 +					k->num_pages = (efi_md_end(md) - lim) >> EFI_PAGE_SHIFT;
   1.465 +					k++;
   1.466 +				}
   1.467 +			}
   1.468 +			ae = contig_high;
   1.469 +		} else
   1.470 +			ae = efi_md_end(md);
   1.471 +
   1.472 +		/* keep within max_addr= command line arg */
   1.473 +		ae = min(ae, max_addr);
   1.474 +		if (ae <= as)
   1.475 +			continue;
   1.476 +
   1.477 +		/* avoid going over mem= command line arg */
   1.478 +		if (total_mem + (ae - as) > mem_limit)
   1.479 +			ae -= total_mem + (ae - as) - mem_limit;
   1.480 +
   1.481 +		if (ae <= as)
   1.482 +			continue;
   1.483 +		if (prev && kmd_end(prev) == md->phys_addr) {
   1.484 +			prev->num_pages += (ae - as) >> EFI_PAGE_SHIFT;
   1.485 +			total_mem += ae - as;
   1.486 +			continue;
   1.487 +		}
   1.488 +		k->attribute = EFI_MEMORY_WB;
   1.489 +		k->start = as;
   1.490 +		k->num_pages = (ae - as) >> EFI_PAGE_SHIFT;
   1.491 +		total_mem += ae - as;
   1.492 +		prev = k++;
   1.493 +	}
   1.494 +	k->start = ~0L; /* end-marker */
   1.495 +
   1.496 +	/* reserve the memory we are using for kern_memmap */
   1.497 +	*s = (u64)kern_memmap;
   1.498 +	*e = (u64)++k;
   1.499 +}
     2.1 --- a/xen/arch/ia64/linux-xen/setup.c	Fri Jun 09 10:40:31 2006 -0600
     2.2 +++ b/xen/arch/ia64/linux-xen/setup.c	Tue Jun 13 08:45:22 2006 -0600
     2.3 @@ -249,6 +249,9 @@ reserve_memory (void)
     2.4  	}
     2.5  #endif
     2.6  
     2.7 +	efi_memmap_init(&rsvd_region[n].start, &rsvd_region[n].end);
     2.8 +	n++;
     2.9 +
    2.10  	/* end of memory marker */
    2.11  	rsvd_region[n].start = ~0UL;
    2.12  	rsvd_region[n].end   = ~0UL;
     3.1 --- a/xen/arch/ia64/xen/xensetup.c	Fri Jun 09 10:40:31 2006 -0600
     3.2 +++ b/xen/arch/ia64/xen/xensetup.c	Tue Jun 13 08:45:22 2006 -0600
     3.3 @@ -90,20 +90,6 @@ xen_count_pages(u64 start, u64 end, void
     3.4      return 0;
     3.5  }
     3.6  
     3.7 -/* Find first hole after trunk for xen image */
     3.8 -static int
     3.9 -xen_find_first_hole(u64 start, u64 end, void *arg)
    3.10 -{
    3.11 -    unsigned long *first_hole = arg;
    3.12 -
    3.13 -    if ((*first_hole) == 0) {
    3.14 -	if ((start <= KERNEL_START) && (KERNEL_START < end))
    3.15 -	    *first_hole = __pa(end);
    3.16 -    }
    3.17 -
    3.18 -    return 0;
    3.19 -}
    3.20 -
    3.21  static void __init do_initcalls(void)
    3.22  {
    3.23      initcall_t *call;
    3.24 @@ -197,15 +183,64 @@ efi_print(void)
    3.25      }
    3.26  }
    3.27  
    3.28 +/*
    3.29 + * These functions are utility functions for getting and
    3.30 + * testing memory descriptors for allocating the xenheap area.
    3.31 + */
    3.32 +static efi_memory_desc_t *
    3.33 +efi_get_md (unsigned long phys_addr)
    3.34 +{
    3.35 +    void *efi_map_start, *efi_map_end, *p;
    3.36 +    efi_memory_desc_t *md;
    3.37 +    u64 efi_desc_size;
    3.38 +
    3.39 +    efi_map_start = __va(ia64_boot_param->efi_memmap);
    3.40 +    efi_map_end   = efi_map_start + ia64_boot_param->efi_memmap_size;
    3.41 +    efi_desc_size = ia64_boot_param->efi_memdesc_size;
    3.42 +
    3.43 +    for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
    3.44 +        md = p;
    3.45 +        if (phys_addr - md->phys_addr < (md->num_pages << EFI_PAGE_SHIFT))
    3.46 +            return md;
    3.47 +    }
    3.48 +    return 0;
    3.49 +}
    3.50 +
    3.51 +static int
    3.52 +is_xenheap_usable_memory(efi_memory_desc_t *md)
    3.53 +{
    3.54 +    if (!(md->attribute & EFI_MEMORY_WB))
    3.55 +        return 0;
    3.56 +
    3.57 +    switch (md->type) {
    3.58 +        case EFI_LOADER_CODE:
    3.59 +        case EFI_LOADER_DATA:
    3.60 +        case EFI_BOOT_SERVICES_CODE:
    3.61 +        case EFI_BOOT_SERVICES_DATA:
    3.62 +        case EFI_CONVENTIONAL_MEMORY:
    3.63 +            return 1;
    3.64 +    }
    3.65 +    return 0;
    3.66 +}
    3.67 +
    3.68 +static inline int
    3.69 +md_overlaps(efi_memory_desc_t *md, unsigned long phys_addr)
    3.70 +{
    3.71 +    return (phys_addr - md->phys_addr < (md->num_pages << EFI_PAGE_SHIFT));
    3.72 +}
    3.73 +
    3.74 +#define MD_SIZE(md) (md->num_pages << EFI_PAGE_SHIFT)
    3.75 +
    3.76  void start_kernel(void)
    3.77  {
    3.78      unsigned char *cmdline;
    3.79      void *heap_start;
    3.80 -    unsigned long nr_pages, firsthole_start;
    3.81 +    unsigned long nr_pages;
    3.82      unsigned long dom0_memory_start, dom0_memory_size;
    3.83      unsigned long dom0_initrd_start, dom0_initrd_size;
    3.84 -    unsigned long initial_images_start, initial_images_end;
    3.85 +    unsigned long md_end, relo_start, relo_end, relo_size = 0;
    3.86      struct domain *idle_domain;
    3.87 +    efi_memory_desc_t *kern_md, *last_md, *md;
    3.88  #ifdef CONFIG_SMP
    3.89      int i;
    3.90  #endif
    3.91 @@ -230,67 +265,111 @@ void start_kernel(void)
    3.92      init_console();
    3.93      set_printk_prefix("(XEN) ");
    3.94  
    3.95 +    if (running_on_sim || ia64_boot_param->domain_start == 0 ||
    3.96 +                          ia64_boot_param->domain_size == 0) {
    3.97 +        /* This is possible only with the old elilo, which does not support
    3.98 +           a vmm.  Fix now, and continue without initrd.  */
    3.99 +        printk ("Your elilo is not Xen-aware.  Bootparams fixed\n");
   3.100 +        ia64_boot_param->domain_start = ia64_boot_param->initrd_start;
   3.101 +        ia64_boot_param->domain_size = ia64_boot_param->initrd_size;
   3.102 +        ia64_boot_param->initrd_start = 0;
   3.103 +        ia64_boot_param->initrd_size = 0;
   3.104 +    }
   3.105 +
   3.106      /* xenheap should be in same TR-covered range with xen image */
   3.107      xenheap_phys_end = xen_pstart + xenheap_size;
   3.108      printk("xen image pstart: 0x%lx, xenheap pend: 0x%lx\n",
   3.109 -	    xen_pstart, xenheap_phys_end);
   3.110 +           xen_pstart, xenheap_phys_end);
   3.111 +
   3.112 +    kern_md = md = efi_get_md(xen_pstart);
   3.113 +    md_end = __pa(ia64_imva(&_end));
   3.114 +    relo_start = xenheap_phys_end;
   3.115 +
   3.116 +    /*
   3.117 +     * Scan through the memory descriptors after the kernel
   3.118 +     * image to make sure we have enough room for the xenheap
   3.119 +     * area, pushing out whatever may already be there.
   3.120 +     */
   3.121 +    while (relo_start + relo_size >= md_end) {
   3.122 +        md = efi_get_md(md_end);
   3.123 +
   3.124 +        BUG_ON(!md);
   3.125 +        BUG_ON(!is_xenheap_usable_memory(md));
   3.126  
   3.127 -    /* Find next hole */
   3.128 -    firsthole_start = 0;
   3.129 -    efi_memmap_walk(xen_find_first_hole, &firsthole_start);
   3.130 +        md_end = md->phys_addr + MD_SIZE(md);
   3.131 +        /*
   3.132 +         * The dom0 kernel or initrd could overlap, reserve space
   3.133 +         * at the end to relocate them later.
   3.134 +         */
   3.135 +        if (md->type == EFI_LOADER_DATA) {
   3.136 +            /* Test for ranges we're not prepared to move */
   3.137 +            BUG_ON(md_overlaps(md, __pa(ia64_boot_param)) ||
   3.138 +                   md_overlaps(md, ia64_boot_param->efi_memmap) ||
   3.139 +                   md_overlaps(md, ia64_boot_param->command_line));
   3.140 +
   3.141 +            relo_size += MD_SIZE(md);
   3.142 +            /* If range overlaps the end, push out the relocation start */
   3.143 +            if (md_end > relo_start)
   3.144 +                relo_start = md_end;
   3.145 +        }
   3.146 +    }
   3.147 +    last_md = md;
   3.148 +    relo_end = relo_start + relo_size;
   3.149  
   3.150 -    if (running_on_sim || ia64_boot_param->domain_start == 0
   3.151 -	|| ia64_boot_param->domain_size == 0) {
   3.152 -	    /* This is possible only with the old elilo, which does not support
   3.153 -	       a vmm.  Fix now, and continue without initrd.  */
   3.154 -	    printk ("Your elilo is not Xen-aware.  Bootparams fixed\n");
   3.155 -	    ia64_boot_param->domain_start = ia64_boot_param->initrd_start;
   3.156 -	    ia64_boot_param->domain_size = ia64_boot_param->initrd_size;
   3.157 -	    ia64_boot_param->initrd_start = 0;
   3.158 -	    ia64_boot_param->initrd_size = 0;
   3.159 +    md_end = __pa(ia64_imva(&_end));
   3.160 + 
   3.161 +    /*
   3.162 +     * Move any relocated data out into the previously found relocation
   3.163 +     * area.  Any extra memory descriptrs are moved out to the end
   3.164 +     * and set to zero pages.
   3.165 +     */
   3.166 +    for (md = efi_get_md(md_end) ;; md = efi_get_md(md_end)) {
   3.167 +        md_end = md->phys_addr + MD_SIZE(md);
   3.168 +
   3.169 +        if (md->type == EFI_LOADER_DATA) {
   3.170 +            unsigned long relo_offset;
   3.171 +
   3.172 +            if (md_overlaps(md, ia64_boot_param->domain_start)) {
   3.173 +                relo_offset = ia64_boot_param->domain_start - md->phys_addr;
   3.174 +                printk("Moving Dom0 kernel image: 0x%lx -> 0x%lx (%ld KiB)\n",
   3.175 +                       ia64_boot_param->domain_start, relo_start + relo_offset,
   3.176 +                       ia64_boot_param->domain_size >> 10);
   3.177 +                ia64_boot_param->domain_start = relo_start + relo_offset;
   3.178 +            }
   3.179 +            if (ia64_boot_param->initrd_size &&
   3.180 +                md_overlaps(md, ia64_boot_param->initrd_start)) {
   3.181 +                relo_offset = ia64_boot_param->initrd_start - md->phys_addr;
   3.182 +                printk("Moving Dom0 initrd image: 0x%lx -> 0x%lx (%ld KiB)\n",
   3.183 +                       ia64_boot_param->initrd_start, relo_start + relo_offset,
   3.184 +                       ia64_boot_param->initrd_size >> 10);
   3.185 +                ia64_boot_param->initrd_start = relo_start + relo_offset;
   3.186 +            }
   3.187 +            memcpy(__va(relo_start), __va(md->phys_addr), MD_SIZE(md));
   3.188 +            relo_start += MD_SIZE(md);
   3.189 +        }
   3.190 +
   3.191 +        if (md == kern_md)
   3.192 +            continue;
   3.193 +        if (md == last_md)
   3.194 +            break;
   3.195 +
   3.196 +        md->phys_addr = relo_end;
   3.197 +        md->num_pages = 0;
   3.198      }
   3.199  
   3.200 -    initial_images_start = xenheap_phys_end;
   3.201 -    initial_images_end = initial_images_start +
   3.202 -       PAGE_ALIGN(ia64_boot_param->domain_size);
   3.203 -
   3.204 -    /* also reserve space for initrd */
   3.205 -    if (ia64_boot_param->initrd_start && ia64_boot_param->initrd_size)
   3.206 -       initial_images_end += PAGE_ALIGN(ia64_boot_param->initrd_size);
   3.207 -    else {
   3.208 -       /* sanity cleanup */
   3.209 -       ia64_boot_param->initrd_size = 0;
   3.210 -       ia64_boot_param->initrd_start = 0;
   3.211 -    }
   3.212 -
   3.213 +    /* Trim the last entry */
   3.214 +    md->phys_addr = relo_end;
   3.215 +    md->num_pages = (md_end - relo_end) >> EFI_PAGE_SHIFT;
   3.216  
   3.217 -    /* Later may find another memory trunk, even away from xen image... */
   3.218 -    if (initial_images_end > firsthole_start) {
   3.219 -	printk("Not enough memory to stash the DOM0 kernel image.\n");
   3.220 -	printk("First hole:0x%lx, relocation end: 0x%lx\n",
   3.221 -		firsthole_start, initial_images_end);
   3.222 -	for ( ; ; );
   3.223 -    }
   3.224 -
   3.225 -    /* This copy is time consuming, but elilo may load Dom0 image
   3.226 -     * within xenheap range */
   3.227 -    printk("ready to move Dom0 to 0x%lx with len %lx...", initial_images_start,
   3.228 -          ia64_boot_param->domain_size);
   3.229 +    /*
   3.230 +     * Expand the new kernel/xenheap (and maybe dom0/initrd) out to
   3.231 +     * the full size.  This range will already be type EFI_LOADER_DATA,
   3.232 +     * therefore the xenheap area is now protected being allocated for
   3.233 +     * use by find_memmap_space() in efi.c
   3.234 +     */
   3.235 +    kern_md->num_pages = (relo_end - kern_md->phys_addr) >> EFI_PAGE_SHIFT;
   3.236  
   3.237 -    memmove(__va(initial_images_start),
   3.238 -          __va(ia64_boot_param->domain_start),
   3.239 -          ia64_boot_param->domain_size);
   3.240 -    ia64_boot_param->domain_start = initial_images_start;
   3.241 -
   3.242 -    printk("ready to move initrd to 0x%lx with len %lx...",
   3.243 -          initial_images_start+PAGE_ALIGN(ia64_boot_param->domain_size),
   3.244 -          ia64_boot_param->initrd_size);
   3.245 -    memmove(__va(initial_images_start+PAGE_ALIGN(ia64_boot_param->domain_size)),
   3.246 -	   __va(ia64_boot_param->initrd_start),
   3.247 -	   ia64_boot_param->initrd_size);
   3.248 -    printk("Done\n");
   3.249 -    ia64_boot_param->initrd_start = initial_images_start +
   3.250 -	PAGE_ALIGN(ia64_boot_param->domain_size);
   3.251 +    reserve_memory();
   3.252  
   3.253      /* first find highest page frame number */
   3.254      max_page = 0;
   3.255 @@ -310,8 +389,6 @@ void start_kernel(void)
   3.256      heap_start = __va(init_boot_allocator(__pa(heap_start)));
   3.257      printf("After heap_start: %p\n", heap_start);
   3.258  
   3.259 -    reserve_memory();
   3.260 -
   3.261      efi_memmap_walk(filter_rsvd_memory, init_boot_pages);
   3.262      efi_memmap_walk(xen_count_pages, &nr_pages);
   3.263  
   3.264 @@ -417,7 +494,7 @@ printk("About to call domain_create()\n"
   3.265       * above our heap. The second module, if present, is an initrd ramdisk.
   3.266       */
   3.267      printk("About to call construct_dom0()\n");
   3.268 -    dom0_memory_start = (unsigned long) __va(initial_images_start);
   3.269 +    dom0_memory_start = (unsigned long) __va(ia64_boot_param->domain_start);
   3.270      dom0_memory_size = ia64_boot_param->domain_size;
   3.271      dom0_initrd_start = (unsigned long) __va(ia64_boot_param->initrd_start);
   3.272      dom0_initrd_size = ia64_boot_param->initrd_size;
     4.1 --- a/xen/include/asm-ia64/linux-xen/asm/meminit.h	Fri Jun 09 10:40:31 2006 -0600
     4.2 +++ b/xen/include/asm-ia64/linux-xen/asm/meminit.h	Tue Jun 13 08:45:22 2006 -0600
     4.3 @@ -22,13 +22,14 @@
     4.4   * 	- dom0 code & data
     4.5   * 	- initrd (optional)
     4.6  #endif
     4.7 + * 	- Kernel memory map built from EFI memory map
     4.8   *
     4.9   * More could be added if necessary
    4.10   */
    4.11  #ifndef XEN
    4.12 -#define IA64_MAX_RSVD_REGIONS 5
    4.13 +#define IA64_MAX_RSVD_REGIONS 6
    4.14  #else
    4.15 -#define IA64_MAX_RSVD_REGIONS 6
    4.16 +#define IA64_MAX_RSVD_REGIONS 7
    4.17  #endif
    4.18  
    4.19  struct rsvd_region {
    4.20 @@ -43,6 +44,7 @@ extern void find_memory (void);
    4.21  extern void reserve_memory (void);
    4.22  extern void find_initrd (void);
    4.23  extern int filter_rsvd_memory (unsigned long start, unsigned long end, void *arg);
    4.24 +extern void efi_memmap_init(unsigned long *, unsigned long *);
    4.25  
    4.26  /*
    4.27   * For rounding an address to the next IA64_GRANULE_SIZE or order