ia64/xen-unstable

changeset 2152:92b8c987ae83

bitkeeper revision 1.1159.1.27 (41180722W5BkKRn14WoivvfF9m_vHw)

vmalloc fix for XL2.4
author kaf24@scramble.cl.cam.ac.uk
date Mon Aug 09 23:22:10 2004 +0000 (2004-08-09)
parents 0b883cd8b325
children d000e4ddb729 e5f6ec915c89
files .rootkeys linux-2.4.26-xen-sparse/mm/vmalloc.c
line diff
     1.1 --- a/.rootkeys	Mon Aug 09 21:59:34 2004 +0000
     1.2 +++ b/.rootkeys	Mon Aug 09 23:22:10 2004 +0000
     1.3 @@ -123,6 +123,7 @@ 3f108af5VxPkLv13tXpXgoRKALQtXQ linux-2.4
     1.4  3e5a4e681xMPdF9xCMwpyfuYMySU5g linux-2.4.26-xen-sparse/mm/mremap.c
     1.5  409ba2e7akOFqQUg6Qyg2s28xcXiMg linux-2.4.26-xen-sparse/mm/page_alloc.c
     1.6  3e5a4e683HKVU-sxtagrDasRB8eBVw linux-2.4.26-xen-sparse/mm/swapfile.c
     1.7 +41180721bNns9Na7w1nJ0ZVt8bhUNA linux-2.4.26-xen-sparse/mm/vmalloc.c
     1.8  40f562372u3A7_kfbYYixPHJJxYUxA linux-2.6.7-xen-sparse/arch/xen/Kconfig
     1.9  40f56237utH41NPukqHksuNf29IC9A linux-2.6.7-xen-sparse/arch/xen/Kconfig.drivers
    1.10  40f56237penAAlWVBVDpeQZNFIg8CA linux-2.6.7-xen-sparse/arch/xen/Makefile
     2.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.2 +++ b/linux-2.4.26-xen-sparse/mm/vmalloc.c	Mon Aug 09 23:22:10 2004 +0000
     2.3 @@ -0,0 +1,385 @@
     2.4 +/*
     2.5 + *  linux/mm/vmalloc.c
     2.6 + *
     2.7 + *  Copyright (C) 1993  Linus Torvalds
     2.8 + *  Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
     2.9 + *  SMP-safe vmalloc/vfree/ioremap, Tigran Aivazian <tigran@veritas.com>, May 2000
    2.10 + */
    2.11 +
    2.12 +#include <linux/config.h>
    2.13 +#include <linux/slab.h>
    2.14 +#include <linux/vmalloc.h>
    2.15 +#include <linux/spinlock.h>
    2.16 +#include <linux/highmem.h>
    2.17 +#include <linux/smp_lock.h>
    2.18 +
    2.19 +#include <asm/uaccess.h>
    2.20 +#include <asm/pgalloc.h>
    2.21 +
    2.22 +rwlock_t vmlist_lock = RW_LOCK_UNLOCKED;
    2.23 +struct vm_struct * vmlist;
    2.24 +
    2.25 +static inline void free_area_pte(pmd_t * pmd, unsigned long address, unsigned long size)
    2.26 +{
    2.27 +	pte_t * pte;
    2.28 +	unsigned long end;
    2.29 +
    2.30 +	if (pmd_none(*pmd))
    2.31 +		return;
    2.32 +	if (pmd_bad(*pmd)) {
    2.33 +		pmd_ERROR(*pmd);
    2.34 +		pmd_clear(pmd);
    2.35 +		return;
    2.36 +	}
    2.37 +	pte = pte_offset(pmd, address);
    2.38 +	address &= ~PMD_MASK;
    2.39 +	end = address + size;
    2.40 +	if (end > PMD_SIZE)
    2.41 +		end = PMD_SIZE;
    2.42 +	do {
    2.43 +		pte_t page;
    2.44 +		page = ptep_get_and_clear(pte);
    2.45 +		address += PAGE_SIZE;
    2.46 +		pte++;
    2.47 +		if (pte_none(page))
    2.48 +			continue;
    2.49 +		if (pte_present(page)) {
    2.50 +			struct page *ptpage = pte_page(page);
    2.51 +			if (VALID_PAGE(ptpage) && (!PageReserved(ptpage)))
    2.52 +				__free_page(ptpage);
    2.53 +			continue;
    2.54 +		}
    2.55 +		printk(KERN_CRIT "Whee.. Swapped out page in kernel page table\n");
    2.56 +	} while (address < end);
    2.57 +}
    2.58 +
    2.59 +static inline void free_area_pmd(pgd_t * dir, unsigned long address, unsigned long size)
    2.60 +{
    2.61 +	pmd_t * pmd;
    2.62 +	unsigned long end;
    2.63 +
    2.64 +	if (pgd_none(*dir))
    2.65 +		return;
    2.66 +	if (pgd_bad(*dir)) {
    2.67 +		pgd_ERROR(*dir);
    2.68 +		pgd_clear(dir);
    2.69 +		return;
    2.70 +	}
    2.71 +	pmd = pmd_offset(dir, address);
    2.72 +	address &= ~PGDIR_MASK;
    2.73 +	end = address + size;
    2.74 +	if (end > PGDIR_SIZE)
    2.75 +		end = PGDIR_SIZE;
    2.76 +	do {
    2.77 +		free_area_pte(pmd, address, end - address);
    2.78 +		address = (address + PMD_SIZE) & PMD_MASK;
    2.79 +		pmd++;
    2.80 +	} while (address < end);
    2.81 +}
    2.82 +
    2.83 +void vmfree_area_pages(unsigned long address, unsigned long size)
    2.84 +{
    2.85 +	pgd_t * dir;
    2.86 +	unsigned long end = address + size;
    2.87 +
    2.88 +	dir = pgd_offset_k(address);
    2.89 +	flush_cache_all();
    2.90 +	do {
    2.91 +		free_area_pmd(dir, address, end - address);
    2.92 +		address = (address + PGDIR_SIZE) & PGDIR_MASK;
    2.93 +		dir++;
    2.94 +	} while (address && (address < end));
    2.95 +	flush_tlb_all();
    2.96 +}
    2.97 +
    2.98 +static inline int alloc_area_pte (pte_t * pte, unsigned long address,
    2.99 +			unsigned long size, int gfp_mask,
   2.100 +			pgprot_t prot, struct page ***pages)
   2.101 +{
   2.102 +	unsigned long end;
   2.103 +
   2.104 +	address &= ~PMD_MASK;
   2.105 +	end = address + size;
   2.106 +	if (end > PMD_SIZE)
   2.107 +		end = PMD_SIZE;
   2.108 +	do {
   2.109 +		struct page * page;
   2.110 +
   2.111 +		if (!pages) {
   2.112 +			spin_unlock(&init_mm.page_table_lock);
   2.113 +			page = alloc_page(gfp_mask);
   2.114 +			spin_lock(&init_mm.page_table_lock);
   2.115 +		} else {
   2.116 +			page = (**pages);
   2.117 +			(*pages)++;
   2.118 +
   2.119 +			/* Add a reference to the page so we can free later */
   2.120 +			if (page)
   2.121 +				atomic_inc(&page->count);
   2.122 +
   2.123 +		}
   2.124 +		if (!pte_none(*pte))
   2.125 +			printk(KERN_ERR "alloc_area_pte: page already exists\n");
   2.126 +		if (!page)
   2.127 +			return -ENOMEM;
   2.128 +		set_pte(pte, mk_pte(page, prot));
   2.129 +		address += PAGE_SIZE;
   2.130 +		pte++;
   2.131 +	} while (address < end);
   2.132 +	return 0;
   2.133 +}
   2.134 +
   2.135 +static inline int alloc_area_pmd(pmd_t * pmd, unsigned long address,
   2.136 +			unsigned long size, int gfp_mask,
   2.137 +			pgprot_t prot, struct page ***pages)
   2.138 +{
   2.139 +	unsigned long end;
   2.140 +
   2.141 +	address &= ~PGDIR_MASK;
   2.142 +	end = address + size;
   2.143 +	if (end > PGDIR_SIZE)
   2.144 +		end = PGDIR_SIZE;
   2.145 +	do {
   2.146 +		pte_t * pte = pte_alloc(&init_mm, pmd, address);
   2.147 +		if (!pte)
   2.148 +			return -ENOMEM;
   2.149 +		if (alloc_area_pte(pte, address, end - address,
   2.150 +					gfp_mask, prot, pages))
   2.151 +			return -ENOMEM;
   2.152 +		address = (address + PMD_SIZE) & PMD_MASK;
   2.153 +		pmd++;
   2.154 +	} while (address < end);
   2.155 +	return 0;
   2.156 +}
   2.157 +
   2.158 +static inline int __vmalloc_area_pages (unsigned long address,
   2.159 +					unsigned long size,
   2.160 +					int gfp_mask,
   2.161 +					pgprot_t prot,
   2.162 +					struct page ***pages)
   2.163 +{
   2.164 +	pgd_t * dir;
   2.165 +	unsigned long start = address;
   2.166 +	unsigned long end = address + size;
   2.167 +
   2.168 +	dir = pgd_offset_k(address);
   2.169 +	spin_lock(&init_mm.page_table_lock);
   2.170 +	do {
   2.171 +		pmd_t *pmd;
   2.172 +		
   2.173 +		pmd = pmd_alloc(&init_mm, dir, address);
   2.174 +		if (!pmd)
   2.175 +			goto err;
   2.176 +
   2.177 +		if (alloc_area_pmd(pmd, address, end - address, gfp_mask, prot, pages))
   2.178 +			goto err;	// The kernel NEVER reclaims pmds, so no need to undo pmd_alloc() here
   2.179 +
   2.180 +		address = (address + PGDIR_SIZE) & PGDIR_MASK;
   2.181 +		dir++;
   2.182 +	} while (address && (address < end));
   2.183 +	spin_unlock(&init_mm.page_table_lock);
   2.184 +	flush_cache_all();
   2.185 +	XEN_flush_page_update_queue();
   2.186 +	return 0;
   2.187 +err:
   2.188 +	spin_unlock(&init_mm.page_table_lock);
   2.189 +	flush_cache_all();
   2.190 +	if (address > start)
   2.191 +		vmfree_area_pages(start, address - start);
   2.192 +	return -ENOMEM;
   2.193 +}
   2.194 +
   2.195 +int vmalloc_area_pages(unsigned long address, unsigned long size,
   2.196 +		       int gfp_mask, pgprot_t prot)
   2.197 +{
   2.198 +	return __vmalloc_area_pages(address, size, gfp_mask, prot, NULL);
   2.199 +}
   2.200 +
   2.201 +struct vm_struct * get_vm_area(unsigned long size, unsigned long flags)
   2.202 +{
   2.203 +	unsigned long addr, next;
   2.204 +	struct vm_struct **p, *tmp, *area;
   2.205 +
   2.206 +	area = (struct vm_struct *) kmalloc(sizeof(*area), GFP_KERNEL);
   2.207 +	if (!area)
   2.208 +		return NULL;
   2.209 +
   2.210 +	size += PAGE_SIZE;
   2.211 +	if (!size) {
   2.212 +		kfree (area);
   2.213 +		return NULL;
   2.214 +	}
   2.215 +
   2.216 +	addr = VMALLOC_START;
   2.217 +	write_lock(&vmlist_lock);
   2.218 +	for (p = &vmlist; (tmp = *p) ; p = &tmp->next) {
   2.219 +		if ((size + addr) < addr)
   2.220 +			goto out;
   2.221 +		if (size + addr <= (unsigned long) tmp->addr)
   2.222 +			break;
   2.223 +		next = tmp->size + (unsigned long) tmp->addr;
   2.224 +		if (next > addr) 
   2.225 +			addr = next;
   2.226 +		if (addr > VMALLOC_END-size)
   2.227 +			goto out;
   2.228 +	}
   2.229 +	area->flags = flags;
   2.230 +	area->addr = (void *)addr;
   2.231 +	area->size = size;
   2.232 +	area->next = *p;
   2.233 +	*p = area;
   2.234 +	write_unlock(&vmlist_lock);
   2.235 +	return area;
   2.236 +
   2.237 +out:
   2.238 +	write_unlock(&vmlist_lock);
   2.239 +	kfree(area);
   2.240 +	return NULL;
   2.241 +}
   2.242 +
   2.243 +void __vfree(void * addr, int free_area_pages)
   2.244 +{
   2.245 +	struct vm_struct **p, *tmp;
   2.246 +
   2.247 +	if (!addr)
   2.248 +		return;
   2.249 +	if ((PAGE_SIZE-1) & (unsigned long) addr) {
   2.250 +		printk(KERN_ERR "Trying to vfree() bad address (%p)\n", addr);
   2.251 +		return;
   2.252 +	}
   2.253 +	write_lock(&vmlist_lock);
   2.254 +	for (p = &vmlist ; (tmp = *p) ; p = &tmp->next) {
   2.255 +		if (tmp->addr == addr) {
   2.256 +			*p = tmp->next;
   2.257 +			if (free_area_pages)
   2.258 +				vmfree_area_pages(VMALLOC_VMADDR(tmp->addr), tmp->size);
   2.259 +			write_unlock(&vmlist_lock);
   2.260 +			kfree(tmp);
   2.261 +			return;
   2.262 +		}
   2.263 +	}
   2.264 +	write_unlock(&vmlist_lock);
   2.265 +	printk(KERN_ERR "Trying to vfree() nonexistent vm area (%p)\n", addr);
   2.266 +}
   2.267 +
   2.268 +void vfree(void * addr)
   2.269 +{
   2.270 +	__vfree(addr,1);
   2.271 +}
   2.272 +
   2.273 +void * __vmalloc (unsigned long size, int gfp_mask, pgprot_t prot)
   2.274 +{
   2.275 +	void * addr;
   2.276 +	struct vm_struct *area;
   2.277 +
   2.278 +	size = PAGE_ALIGN(size);
   2.279 +	if (!size || (size >> PAGE_SHIFT) > num_physpages)
   2.280 +		return NULL;
   2.281 +	area = get_vm_area(size, VM_ALLOC);
   2.282 +	if (!area)
   2.283 +		return NULL;
   2.284 +	addr = area->addr;
   2.285 +	if (__vmalloc_area_pages(VMALLOC_VMADDR(addr), size, gfp_mask,
   2.286 +				 prot, NULL)) {
   2.287 +		__vfree(addr, 0);
   2.288 +		return NULL;
   2.289 +	}
   2.290 +	return addr;
   2.291 +}
   2.292 +
   2.293 +void * vmap(struct page **pages, int count,
   2.294 +	    unsigned long flags, pgprot_t prot)
   2.295 +{
   2.296 +	void * addr;
   2.297 +	struct vm_struct *area;
   2.298 +	unsigned long size = count << PAGE_SHIFT;
   2.299 +
   2.300 +	if (!size || size > (max_mapnr << PAGE_SHIFT))
   2.301 +		return NULL;
   2.302 +	area = get_vm_area(size, flags);
   2.303 +	if (!area) {
   2.304 +		return NULL;
   2.305 +	}
   2.306 +	addr = area->addr;
   2.307 +	if (__vmalloc_area_pages(VMALLOC_VMADDR(addr), size, 0,
   2.308 +				 prot, &pages)) {
   2.309 +		__vfree(addr, 0);
   2.310 +		return NULL;
   2.311 +	}
   2.312 +	return addr;
   2.313 +}
   2.314 +
   2.315 +long vread(char *buf, char *addr, unsigned long count)
   2.316 +{
   2.317 +	struct vm_struct *tmp;
   2.318 +	char *vaddr, *buf_start = buf;
   2.319 +	unsigned long n;
   2.320 +
   2.321 +	/* Don't allow overflow */
   2.322 +	if ((unsigned long) addr + count < count)
   2.323 +		count = -(unsigned long) addr;
   2.324 +
   2.325 +	read_lock(&vmlist_lock);
   2.326 +	for (tmp = vmlist; tmp; tmp = tmp->next) {
   2.327 +		vaddr = (char *) tmp->addr;
   2.328 +		if (addr >= vaddr + tmp->size - PAGE_SIZE)
   2.329 +			continue;
   2.330 +		while (addr < vaddr) {
   2.331 +			if (count == 0)
   2.332 +				goto finished;
   2.333 +			*buf = '\0';
   2.334 +			buf++;
   2.335 +			addr++;
   2.336 +			count--;
   2.337 +		}
   2.338 +		n = vaddr + tmp->size - PAGE_SIZE - addr;
   2.339 +		do {
   2.340 +			if (count == 0)
   2.341 +				goto finished;
   2.342 +			*buf = *addr;
   2.343 +			buf++;
   2.344 +			addr++;
   2.345 +			count--;
   2.346 +		} while (--n > 0);
   2.347 +	}
   2.348 +finished:
   2.349 +	read_unlock(&vmlist_lock);
   2.350 +	return buf - buf_start;
   2.351 +}
   2.352 +
   2.353 +long vwrite(char *buf, char *addr, unsigned long count)
   2.354 +{
   2.355 +	struct vm_struct *tmp;
   2.356 +	char *vaddr, *buf_start = buf;
   2.357 +	unsigned long n;
   2.358 +
   2.359 +	/* Don't allow overflow */
   2.360 +	if ((unsigned long) addr + count < count)
   2.361 +		count = -(unsigned long) addr;
   2.362 +
   2.363 +	read_lock(&vmlist_lock);
   2.364 +	for (tmp = vmlist; tmp; tmp = tmp->next) {
   2.365 +		vaddr = (char *) tmp->addr;
   2.366 +		if (addr >= vaddr + tmp->size - PAGE_SIZE)
   2.367 +			continue;
   2.368 +		while (addr < vaddr) {
   2.369 +			if (count == 0)
   2.370 +				goto finished;
   2.371 +			buf++;
   2.372 +			addr++;
   2.373 +			count--;
   2.374 +		}
   2.375 +		n = vaddr + tmp->size - PAGE_SIZE - addr;
   2.376 +		do {
   2.377 +			if (count == 0)
   2.378 +				goto finished;
   2.379 +			*addr = *buf;
   2.380 +			buf++;
   2.381 +			addr++;
   2.382 +			count--;
   2.383 +		} while (--n > 0);
   2.384 +	}
   2.385 +finished:
   2.386 +	read_unlock(&vmlist_lock);
   2.387 +	return buf - buf_start;
   2.388 +}