ia64/xen-unstable

changeset 10144:953753661a3b

[IA64] make xenLinux/ia64 privcmd mmap not to use dom0 memory

xenLinux/ia64 privcmd mmap uses pseudo physical address space.
it used alloc_pages() to allocate the space.
It wastes dom0 memory and sometimes several hundreds megabytes is
allocated depending on domU memory size.
With this patch xenLinux/ia64 trys to find the region which can be
used safely and uses the reasion.

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
author awilliam@xenbuild.aw
date Tue May 23 15:09:21 2006 -0600 (2006-05-23)
parents f0f88d9c4c9e
children 0dabd651b856
files linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c
line diff
     1.1 --- a/linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c	Tue May 23 09:17:57 2006 -0600
     1.2 +++ b/linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c	Tue May 23 15:09:21 2006 -0600
     1.3 @@ -360,116 +360,132 @@ struct address_space xen_ia64_foreign_du
     1.4  
     1.5  ///////////////////////////////////////////////////////////////////////////
     1.6  // foreign mapping
     1.7 +#include <linux/efi.h>
     1.8 +#include <asm/meminit.h> // for IA64_GRANULE_SIZE, GRANULEROUND{UP,DOWN}()
     1.9  
    1.10 -struct xen_ia64_privcmd_entry {
    1.11 -	atomic_t	map_count;
    1.12 -	struct page*	page;
    1.13 -};
    1.14 +static unsigned long privcmd_resource_min = 0;
    1.15 +// Xen/ia64 currently can handle pseudo physical address bits up to
    1.16 +// (PAGE_SHIFT * 3)
    1.17 +static unsigned long privcmd_resource_max = GRANULEROUNDDOWN((1UL << (PAGE_SHIFT * 3)) - 1);
    1.18 +static unsigned long privcmd_resource_align = IA64_GRANULE_SIZE;
    1.19  
    1.20 -static void
    1.21 -xen_ia64_privcmd_init_entry(struct xen_ia64_privcmd_entry* entry)
    1.22 +static unsigned long
    1.23 +md_end_addr(const efi_memory_desc_t *md)
    1.24  {
    1.25 -	atomic_set(&entry->map_count, 0);
    1.26 -	entry->page = NULL;
    1.27 +	return md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
    1.28 +}
    1.29 +
    1.30 +#define XEN_IA64_PRIVCMD_LEAST_GAP_SIZE	(1024 * 1024 * 1024UL)
    1.31 +static int
    1.32 +xen_ia64_privcmd_check_size(unsigned long start, unsigned long end)
    1.33 +{
    1.34 +	return (start < end &&
    1.35 +		(end - start) > XEN_IA64_PRIVCMD_LEAST_GAP_SIZE);
    1.36  }
    1.37  
    1.38 -//TODO alloc_page() to allocate pseudo physical address space is 
    1.39 -//     waste of memory.
    1.40 -//     When vti domain is created, qemu maps all of vti domain pages which 
    1.41 -//     reaches to several hundred megabytes at least.
    1.42 -//     remove alloc_page().
    1.43 -static int
    1.44 -xen_ia64_privcmd_entry_mmap(struct vm_area_struct* vma,
    1.45 -			    unsigned long addr,
    1.46 -			    struct xen_ia64_privcmd_entry* entry,
    1.47 -			    unsigned long mfn,
    1.48 -			    pgprot_t prot,
    1.49 -			    domid_t domid)
    1.50 +static int __init
    1.51 +xen_ia64_privcmd_init(void)
    1.52  {
    1.53 -	int error = 0;
    1.54 -	struct page* page;
    1.55 -	unsigned long gpfn;
    1.56 +	void *efi_map_start, *efi_map_end, *p;
    1.57 +	u64 efi_desc_size;
    1.58 +	efi_memory_desc_t *md;
    1.59 +	unsigned long tmp_min;
    1.60 +	unsigned long tmp_max;
    1.61 +	unsigned long gap_size;
    1.62 +	unsigned long prev_end;
    1.63  
    1.64 -	BUG_ON((addr & ~PAGE_MASK) != 0);
    1.65 -	BUG_ON(mfn == INVALID_MFN);
    1.66 +	if (!is_running_on_xen())
    1.67 +		return -1;
    1.68  
    1.69 -	if (entry->page != NULL) {
    1.70 -		error = -EBUSY;
    1.71 -		goto out;
    1.72 +	efi_map_start = __va(ia64_boot_param->efi_memmap);
    1.73 +	efi_map_end   = efi_map_start + ia64_boot_param->efi_memmap_size;
    1.74 +	efi_desc_size = ia64_boot_param->efi_memdesc_size;
    1.75 +
    1.76 +	// at first check the used highest address
    1.77 +	for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
    1.78 +		// nothing
    1.79  	}
    1.80 -	page = alloc_page(GFP_KERNEL);
    1.81 -	if (page == NULL) {
    1.82 -		error = -ENOMEM;
    1.83 -		goto out;
    1.84 -	}
    1.85 -	gpfn = page_to_pfn(page);
    1.86 -
    1.87 -	error = HYPERVISOR_add_physmap(gpfn, mfn, 0/* prot:XXX */,
    1.88 -				       domid);
    1.89 -	if (error != 0) {
    1.90 +	md = p - efi_desc_size;
    1.91 +	privcmd_resource_min = GRANULEROUNDUP(md_end_addr(md));
    1.92 +	if (xen_ia64_privcmd_check_size(privcmd_resource_min,
    1.93 +					privcmd_resource_max)) {
    1.94  		goto out;
    1.95  	}
    1.96  
    1.97 -	prot = vma->vm_page_prot;
    1.98 -	error = remap_pfn_range(vma, addr, gpfn, 1 << PAGE_SHIFT, prot);
    1.99 -	if (error != 0) {
   1.100 -		(void)HYPERVISOR_zap_physmap(gpfn, 0);
   1.101 -		error = HYPERVISOR_populate_physmap(gpfn, 0, 0);
   1.102 -		if (error) {
   1.103 -			BUG();//XXX
   1.104 +	// the used highest address is too large. try to find the largest gap.
   1.105 +	tmp_min = privcmd_resource_max;
   1.106 +	tmp_max = 0;
   1.107 +	gap_size = 0;
   1.108 +	prev_end = 0;
   1.109 +	for (p = efi_map_start;
   1.110 +	     p < efi_map_end - efi_desc_size;
   1.111 +	     p += efi_desc_size) {
   1.112 +		unsigned long end;
   1.113 +		efi_memory_desc_t* next;
   1.114 +		unsigned long next_start;
   1.115 +
   1.116 +		md = p;
   1.117 +		end = md_end_addr(md);
   1.118 +		if (end > privcmd_resource_max) {
   1.119 +			break;
   1.120 +		}
   1.121 +		if (end < prev_end) {
   1.122 +			// work around. 
   1.123 +			// Xen may pass incompletely sorted memory
   1.124 +			// descriptors like
   1.125 +			// [x, x + length]
   1.126 +			// [x, x]
   1.127 +			// this order should be reversed.
   1.128 +			continue;
   1.129  		}
   1.130 -		__free_page(page);
   1.131 -	} else {
   1.132 -		atomic_inc(&entry->map_count);
   1.133 -		entry->page = page;
   1.134 +		next = p + efi_desc_size;
   1.135 +		next_start = next->phys_addr;
   1.136 +		if (next_start > privcmd_resource_max) {
   1.137 +			next_start = privcmd_resource_max;
   1.138 +		}
   1.139 +		if (end < next_start && gap_size < (next_start - end)) {
   1.140 +			tmp_min = end;
   1.141 +			tmp_max = next_start;
   1.142 +			gap_size = tmp_max - tmp_min;
   1.143 +		}
   1.144 +		prev_end = end;
   1.145 +	}
   1.146 +
   1.147 +	privcmd_resource_min = GRANULEROUNDUP(tmp_min);
   1.148 +	if (xen_ia64_privcmd_check_size(privcmd_resource_min, tmp_max)) {
   1.149 +		privcmd_resource_max = tmp_max;
   1.150 +		goto out;
   1.151 +	}
   1.152 +
   1.153 +	privcmd_resource_min = tmp_min;
   1.154 +	privcmd_resource_max = tmp_max;
   1.155 +	if (!xen_ia64_privcmd_check_size(privcmd_resource_min,
   1.156 +					 privcmd_resource_max)) {
   1.157 +		// Any large enough gap isn't found.
   1.158 +		// go ahead anyway with the warning hoping that large region
   1.159 +		// won't be requested.
   1.160 +		printk(KERN_WARNING "xen privcmd: large enough region for privcmd mmap is not found.\n");
   1.161  	}
   1.162  
   1.163  out:
   1.164 -	return error;
   1.165 +	printk(KERN_INFO "xen privcmd uses pseudo physical addr range [0x%lx, 0x%lx] (%ldMB)\n",
   1.166 +	       privcmd_resource_min, privcmd_resource_max, 
   1.167 +	       (privcmd_resource_max - privcmd_resource_min) >> 20);
   1.168 +	BUG_ON(privcmd_resource_min >= privcmd_resource_max);
   1.169 +	return 0;
   1.170  }
   1.171 -
   1.172 -static void
   1.173 -xen_ia64_privcmd_entry_munmap(struct xen_ia64_privcmd_entry* entry)
   1.174 -{
   1.175 -	struct page* page = entry->page;
   1.176 -	unsigned long gpfn = page_to_pfn(page);
   1.177 -	int error;
   1.178 -
   1.179 -	error = HYPERVISOR_zap_physmap(gpfn, 0);
   1.180 -	if (error) {
   1.181 -		BUG();//XXX
   1.182 -	}
   1.183 -
   1.184 -	error = HYPERVISOR_populate_physmap(gpfn, 0, 0);
   1.185 -	if (error) {
   1.186 -		BUG();//XXX
   1.187 -	}
   1.188 +late_initcall(xen_ia64_privcmd_init);
   1.189  
   1.190 -	entry->page = NULL;
   1.191 -	__free_page(page);
   1.192 -}
   1.193 -
   1.194 -static int
   1.195 -xen_ia64_privcmd_entry_open(struct xen_ia64_privcmd_entry* entry)
   1.196 -{
   1.197 -	if (entry->page != NULL) {
   1.198 -		atomic_inc(&entry->map_count);
   1.199 -	} else {
   1.200 -		BUG_ON(atomic_read(&entry->map_count) != 0);
   1.201 -	}
   1.202 -}
   1.203 -
   1.204 -static int
   1.205 -xen_ia64_privcmd_entry_close(struct xen_ia64_privcmd_entry* entry)
   1.206 -{
   1.207 -	if (entry->page != NULL && atomic_dec_and_test(&entry->map_count)) {
   1.208 -		xen_ia64_privcmd_entry_munmap(entry);
   1.209 -	}
   1.210 -}
   1.211 +struct xen_ia64_privcmd_entry {
   1.212 +	atomic_t	map_count;
   1.213 +#define INVALID_GPFN	(~0UL)
   1.214 +	unsigned long	gpfn;
   1.215 +};
   1.216  
   1.217  struct xen_ia64_privcmd_range {
   1.218  	atomic_t			ref_count;
   1.219  	unsigned long			pgoff; // in PAGE_SIZE
   1.220 +	struct resource*		res;
   1.221  
   1.222  	unsigned long			num_entries;
   1.223  	struct xen_ia64_privcmd_entry	entries[0];
   1.224 @@ -482,6 +498,97 @@ struct xen_ia64_privcmd_vma {
   1.225  	struct xen_ia64_privcmd_entry*	entries;
   1.226  };
   1.227  
   1.228 +static void
   1.229 +xen_ia64_privcmd_init_entry(struct xen_ia64_privcmd_entry* entry)
   1.230 +{
   1.231 +	atomic_set(&entry->map_count, 0);
   1.232 +	entry->gpfn = INVALID_GPFN;
   1.233 +}
   1.234 +
   1.235 +static int
   1.236 +xen_ia64_privcmd_entry_mmap(struct vm_area_struct* vma,
   1.237 +			    unsigned long addr,
   1.238 +			    struct xen_ia64_privcmd_range* privcmd_range,
   1.239 +			    int i,
   1.240 +			    unsigned long mfn,
   1.241 +			    pgprot_t prot,
   1.242 +			    domid_t domid)
   1.243 +{
   1.244 +	int error = 0;
   1.245 +	struct xen_ia64_privcmd_entry* entry = &privcmd_range->entries[i];
   1.246 +	unsigned long gpfn;
   1.247 +
   1.248 +	BUG_ON((addr & ~PAGE_MASK) != 0);
   1.249 +	BUG_ON(mfn == INVALID_MFN);
   1.250 +
   1.251 +	if (entry->gpfn != INVALID_GPFN) {
   1.252 +		error = -EBUSY;
   1.253 +		goto out;
   1.254 +	}
   1.255 +	gpfn = (privcmd_range->res->start >> PAGE_SHIFT) + i;
   1.256 +
   1.257 +	error = HYPERVISOR_add_physmap(gpfn, mfn, 0/* prot:XXX */,
   1.258 +				       domid);
   1.259 +	if (error != 0) {
   1.260 +		goto out;
   1.261 +	}
   1.262 +
   1.263 +	prot = vma->vm_page_prot;
   1.264 +	error = remap_pfn_range(vma, addr, gpfn, 1 << PAGE_SHIFT, prot);
   1.265 +	if (error != 0) {
   1.266 +		error = HYPERVISOR_zap_physmap(gpfn, 0);
   1.267 +		if (error) {
   1.268 +			BUG();//XXX
   1.269 +		}
   1.270 +	} else {
   1.271 +		atomic_inc(&entry->map_count);
   1.272 +		entry->gpfn = gpfn;
   1.273 +	}
   1.274 +
   1.275 +out:
   1.276 +	return error;
   1.277 +}
   1.278 +
   1.279 +static void
   1.280 +xen_ia64_privcmd_entry_munmap(struct xen_ia64_privcmd_range* privcmd_range,
   1.281 +			      int i)
   1.282 +{
   1.283 +	struct xen_ia64_privcmd_entry* entry = &privcmd_range->entries[i];
   1.284 +	unsigned long gpfn = entry->gpfn;
   1.285 +	//gpfn = (privcmd_range->res->start >> PAGE_SHIFT) +
   1.286 +	//	(vma->vm_pgoff - privcmd_range->pgoff);
   1.287 +	int error;
   1.288 +
   1.289 +	error = HYPERVISOR_zap_physmap(gpfn, 0);
   1.290 +	if (error) {
   1.291 +		BUG();//XXX
   1.292 +	}
   1.293 +	entry->gpfn = INVALID_GPFN;
   1.294 +}
   1.295 +
   1.296 +static int
   1.297 +xen_ia64_privcmd_entry_open(struct xen_ia64_privcmd_range* privcmd_range,
   1.298 +			    int i)
   1.299 +{
   1.300 +	struct xen_ia64_privcmd_entry* entry = &privcmd_range->entries[i];
   1.301 +	if (entry->gpfn != INVALID_GPFN) {
   1.302 +		atomic_inc(&entry->map_count);
   1.303 +	} else {
   1.304 +		BUG_ON(atomic_read(&entry->map_count) != 0);
   1.305 +	}
   1.306 +}
   1.307 +
   1.308 +static int
   1.309 +xen_ia64_privcmd_entry_close(struct xen_ia64_privcmd_range* privcmd_range,
   1.310 +			     int i)
   1.311 +{
   1.312 +	struct xen_ia64_privcmd_entry* entry = &privcmd_range->entries[i];
   1.313 +	if (entry->gpfn != INVALID_GPFN &&
   1.314 +	    atomic_dec_and_test(&entry->map_count)) {
   1.315 +		xen_ia64_privcmd_entry_munmap(privcmd_range, i);
   1.316 +	}
   1.317 +}
   1.318 +
   1.319  static void xen_ia64_privcmd_vma_open(struct vm_area_struct* vma);
   1.320  static void xen_ia64_privcmd_vma_close(struct vm_area_struct* vma);
   1.321  
   1.322 @@ -507,7 +614,7 @@ static void
   1.323  	privcmd_vma->entries = &privcmd_range->entries[entry_offset];
   1.324  	vma->vm_private_data = privcmd_vma;
   1.325  	for (i = 0; i < privcmd_vma->num_entries; i++) {
   1.326 -		xen_ia64_privcmd_entry_open(&privcmd_vma->entries[i]);
   1.327 +		xen_ia64_privcmd_entry_open(privcmd_range, entry_offset + i);
   1.328  	}
   1.329  
   1.330  	vma->vm_private_data = privcmd_vma;
   1.331 @@ -533,10 +640,11 @@ xen_ia64_privcmd_vma_close(struct vm_are
   1.332  	struct xen_ia64_privcmd_vma* privcmd_vma =
   1.333  		(struct xen_ia64_privcmd_vma*)vma->vm_private_data;
   1.334  	struct xen_ia64_privcmd_range* privcmd_range = privcmd_vma->range;
   1.335 +	unsigned long entry_offset = vma->vm_pgoff - privcmd_range->pgoff;
   1.336  	unsigned long i;
   1.337  
   1.338  	for (i = 0; i < privcmd_vma->num_entries; i++) {
   1.339 -		xen_ia64_privcmd_entry_close(&privcmd_vma->entries[i]);
   1.340 +		xen_ia64_privcmd_entry_close(privcmd_range, entry_offset + i);
   1.341  	}
   1.342  	vma->vm_private_data = NULL;
   1.343  	kfree(privcmd_vma);
   1.344 @@ -547,9 +655,11 @@ xen_ia64_privcmd_vma_close(struct vm_are
   1.345  			struct xen_ia64_privcmd_entry* entry =
   1.346  				&privcmd_range->entries[i];
   1.347  			BUG_ON(atomic_read(&entry->map_count) != 0);
   1.348 -			BUG_ON(entry->page != NULL);
   1.349 +			BUG_ON(entry->gpfn != INVALID_GPFN);
   1.350  		}
   1.351  #endif
   1.352 +		release_resource(privcmd_range->res);
   1.353 +		kfree(privcmd_range->res);
   1.354  		vfree(privcmd_range);
   1.355  	}
   1.356  }
   1.357 @@ -557,13 +667,18 @@ xen_ia64_privcmd_vma_close(struct vm_are
   1.358  int
   1.359  privcmd_mmap(struct file * file, struct vm_area_struct * vma)
   1.360  {
   1.361 -	unsigned long num_entries = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
   1.362 -	struct xen_ia64_privcmd_range* privcmd_range;
   1.363 -	struct xen_ia64_privcmd_vma* privcmd_vma;
   1.364 +	int error;
   1.365 +	unsigned long size = vma->vm_end - vma->vm_start;
   1.366 +	unsigned long num_entries = size >> PAGE_SHIFT;
   1.367 +	struct xen_ia64_privcmd_range* privcmd_range = NULL;
   1.368 +	struct xen_ia64_privcmd_vma* privcmd_vma = NULL;
   1.369 +	struct resource* res = NULL;
   1.370  	unsigned long i;
   1.371  	BUG_ON(!running_on_xen);
   1.372  
   1.373  	BUG_ON(file->private_data != NULL);
   1.374 +
   1.375 +	error = -ENOMEM;
   1.376  	privcmd_range =
   1.377  		vmalloc(sizeof(*privcmd_range) +
   1.378  			sizeof(privcmd_range->entries[0]) * num_entries);
   1.379 @@ -574,6 +689,18 @@ privcmd_mmap(struct file * file, struct 
   1.380  	if (privcmd_vma == NULL) {
   1.381  		goto out_enomem1;
   1.382  	}
   1.383 +	res = kzalloc(sizeof(*res), GFP_KERNEL);
   1.384 +	if (res == NULL) {
   1.385 +		goto out_enomem1;
   1.386 +	}
   1.387 +	res->name = "Xen privcmd mmap";
   1.388 +	error = allocate_resource(&iomem_resource, res, size,
   1.389 +				  privcmd_resource_min, privcmd_resource_max,
   1.390 +				  privcmd_resource_align, NULL, NULL);
   1.391 +	if (error) {
   1.392 +		goto out_enomem1;
   1.393 +	}
   1.394 +	privcmd_range->res = res;
   1.395  
   1.396  	/* DONTCOPY is essential for Xen as copy_page_range is broken. */
   1.397  	vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY | VM_PFNMAP;
   1.398 @@ -589,10 +716,11 @@ privcmd_mmap(struct file * file, struct 
   1.399  	return 0;
   1.400  
   1.401  out_enomem1:
   1.402 +	kfree(res);
   1.403  	kfree(privcmd_vma);
   1.404  out_enomem0:
   1.405  	vfree(privcmd_range);
   1.406 -	return -ENOMEM;
   1.407 +	return error;
   1.408  }
   1.409  
   1.410  int
   1.411 @@ -605,6 +733,9 @@ direct_remap_pfn_range(struct vm_area_st
   1.412  {
   1.413  	struct xen_ia64_privcmd_vma* privcmd_vma =
   1.414  		(struct xen_ia64_privcmd_vma*)vma->vm_private_data;
   1.415 +	struct xen_ia64_privcmd_range* privcmd_range = privcmd_vma->range;
   1.416 +	unsigned long entry_offset = vma->vm_pgoff - privcmd_range->pgoff;
   1.417 +
   1.418  	unsigned long i;
   1.419  	unsigned long offset;
   1.420  	int error = 0;
   1.421 @@ -618,9 +749,7 @@ direct_remap_pfn_range(struct vm_area_st
   1.422  
   1.423  	i = (address - vma->vm_start) >> PAGE_SHIFT;
   1.424  	for (offset = 0; offset < size; offset += PAGE_SIZE) {
   1.425 -		struct xen_ia64_privcmd_entry* entry =
   1.426 -			&privcmd_vma->entries[i];
   1.427 -		error = xen_ia64_privcmd_entry_mmap(vma, (address + offset) & PAGE_MASK, entry, mfn, prot, domid);
   1.428 +		error = xen_ia64_privcmd_entry_mmap(vma, (address + offset) & PAGE_MASK, privcmd_range, entry_offset + i, mfn, prot, domid);
   1.429  		if (error != 0) {
   1.430  			break;
   1.431  		}