From: Matt Mackall Date: Tue, 6 Jan 2009 12:06:06 +0000 (+0000) Subject: This interface provides a mapping for each page in an address space to its X-Git-Tag: maps2-patches/maps2-make-proc-pid-clear_refs-option-under-config_embedded.patch X-Git-Url: http://xenbits.xensource.com/gitweb?a=commitdiff_plain;h=61535a060ac3b95d40ee8250039eecea2b780741;p=xenclient%2Fkernel.git This interface provides a mapping for each page in an address space to its physical page frame number, allowing precise determination of what pages are mapped and what pages are shared between processes. [akpm@linux-foundation.org: warning fix] Signed-off-by: Matt Mackall Cc: Jeremy Fitzhardinge Cc: David Rientjes Signed-off-by: Andrew Morton --- fs/proc/base.c | 8 + fs/proc/internal.h | 2 fs/proc/task_mmu.c | 209 +++++++++++++++++++++++++++++++++++++++++++ init/Kconfig | 10 ++ 4 files changed, 228 insertions(+), 1 deletion(-) --- diff --git a/fs/proc/base.c b/fs/proc/base.c index 084724a6..50168dec 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -124,6 +124,9 @@ enum pid_directory_inos { #ifdef CONFIG_PROC_SMAPS PROC_TGID_SMAPS, #endif +#ifdef CONFIG_PROC_PAGEMAP + PROC_TGID_PAGEMAP, +#endif #endif #ifdef CONFIG_SCHEDSTATS PROC_TGID_SCHEDSTAT, @@ -175,6 +178,9 @@ enum pid_directory_inos { #ifdef CONFIG_PROC_SMAPS PROC_TID_SMAPS, #endif +#ifdef CONFIG_PROC_PAGEMAP + PROC_TID_PAGEMAP, +#endif #endif #ifdef CONFIG_SCHEDSTATS PROC_TID_SCHEDSTAT, @@ -243,6 +249,9 @@ static struct pid_entry tgid_base_stuff[] = { #ifdef CONFIG_PROC_SMAPS E(PROC_TGID_SMAPS, "smaps", S_IFREG|S_IRUSR), #endif +#ifdef CONFIG_PROC_PAGEMAP + E(PROC_TGID_PAGEMAP, "pagemap", S_IFREG|S_IRUSR), +#endif #endif #ifdef CONFIG_SECURITY E(PROC_TGID_ATTR, "attr", S_IFDIR|S_IRUGO|S_IXUGO), @@ -297,6 +306,9 @@ static struct pid_entry tid_base_stuff[] = { #ifdef CONFIG_PROC_SMAPS E(PROC_TID_SMAPS, "smaps", S_IFREG|S_IRUSR), #endif +#ifdef CONFIG_PROC_PAGEMAP + E(PROC_TID_PAGEMAP, "pagemap", S_IFREG|S_IRUSR), +#endif #endif #ifdef CONFIG_SECURITY E(PROC_TID_ATTR, "attr", S_IFDIR|S_IRUGO|S_IXUGO), @@ -918,7 +930,7 @@ out_no_task: } #endif -static loff_t mem_lseek(struct file * file, loff_t offset, int orig) +loff_t mem_lseek(struct file * file, loff_t offset, int orig) { switch (orig) { case 0: @@ -2001,6 +2013,12 @@ static struct dentry *proc_pident_lookup(struct inode *dir, inode->i_fop = &proc_smaps_operations; break; #endif +#ifdef CONFIG_PROC_PAGEMAP + case PROC_TID_PAGEMAP: + case PROC_TGID_PAGEMAP: + inode->i_fop = &proc_pagemap_operations; + break; +#endif #endif case PROC_TID_MOUNTSTATS: case PROC_TGID_MOUNTSTATS: diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 983edb76..6e9d3c4a 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -39,11 +39,13 @@ extern int proc_tgid_stat(struct task_struct *, char *); extern int proc_pid_status(struct task_struct *, char *); extern int proc_pid_statm(struct task_struct *, char *); extern int proc_pid_limits(struct task_struct *, char *); +extern loff_t mem_lseek(struct file * file, loff_t offset, int orig); extern struct file_operations proc_maps_operations; extern struct file_operations proc_numa_maps_operations; extern struct file_operations proc_smaps_operations; extern struct file_operations proc_clear_refs_operations; +extern struct file_operations proc_pagemap_operations; void free_proc_entry(struct proc_dir_entry *de); diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 55a29799..068348b1 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -541,3 +542,211 @@ struct file_operations proc_numa_maps_operations = { }; #endif +#ifdef CONFIG_PROC_PAGEMAP +struct pagemapread { + struct mm_struct *mm; + unsigned long next; + unsigned long *buf; + pte_t *ptebuf; + unsigned long pos; + size_t count; + int index; + char __user *out; +}; + +static int flush_pagemap(struct pagemapread *pm) +{ + int n = min(pm->count, pm->index * sizeof(unsigned long)); + if (copy_to_user(pm->out, pm->buf, n)) + return -EFAULT; + pm->out += n; + pm->pos += n; + pm->count -= n; + pm->index = 0; + cond_resched(); + return 0; +} + +static int add_to_pagemap(unsigned long addr, unsigned long pfn, + struct pagemapread *pm) +{ + pm->buf[pm->index++] = pfn; + pm->next = addr + PAGE_SIZE; + if (pm->index * sizeof(unsigned long) >= PAGE_SIZE || + pm->index * sizeof(unsigned long) >= pm->count) + return flush_pagemap(pm); + return 0; +} + +static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, + void *private) +{ + struct pagemapread *pm = private; + pte_t *pte; + int err; + + pte = pte_offset_map(pmd, addr); + +#ifdef CONFIG_HIGHPTE + /* copy PTE directory to temporary buffer and unmap it */ + memcpy(pm->ptebuf, pte, PAGE_ALIGN((unsigned long)pte) - (unsigned long)pte); + pte_unmap(pte); + pte = pm->ptebuf; +#endif + + for (; addr != end; pte++, addr += PAGE_SIZE) { + if (addr < pm->next) + continue; + if (!pte_present(*pte)) + err = add_to_pagemap(addr, -1, pm); + else + err = add_to_pagemap(addr, pte_pfn(*pte), pm); + if (err) + return err; + } + +#ifndef CONFIG_HIGHPTE + pte_unmap(pte - 1); +#endif + + return 0; +} + +static int pagemap_fill(struct pagemapread *pm, unsigned long end) +{ + int ret; + + while (pm->next != end) { + ret = add_to_pagemap(pm->next, -1UL, pm); + if (ret) + return ret; + } + return 0; +} + +static struct mm_walk pagemap_walk = { .pmd_entry = pagemap_pte_range }; + +/* + * /proc/pid/pagemap - an array mapping virtual pages to pfns + * + * For each page in the address space, this file contains one long + * representing the corresponding physical page frame number (PFN) or + * -1 if the page isn't present. This allows determining precisely + * which pages are mapped and comparing mapped pages between + * processes. + * + * Efficient users of this interface will use /proc/pid/maps to + * determine which areas of memory are actually mapped and llseek to + * skip over unmapped regions. + * + * The first 4 bytes of this file form a simple header: + * + * first byte: 0 for big endian, 1 for little + * second byte: page shift (eg 12 for 4096 byte pages) + * third byte: entry size in bytes (currently either 4 or 8) + * fourth byte: header size + */ +static ssize_t pagemap_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct task_struct *task = get_proc_task(file->f_dentry->d_inode); + unsigned long src = *ppos; + unsigned long *page; + unsigned long addr, end, vend, svpfn, evpfn; + struct mm_struct *mm; + struct vm_area_struct *vma; + struct pagemapread pm; + int ret = -ESRCH; + + if (!task) + goto out_no_task; + + ret = -EACCES; + if (!ptrace_may_attach(task)) + goto out; + + ret = -EIO; + svpfn = src / sizeof(unsigned long) - 1; + addr = PAGE_SIZE * svpfn; + if ((svpfn + 1) * sizeof(unsigned long) != src) + goto out; + evpfn = min((src + count) / sizeof(unsigned long), + ((~0UL) >> PAGE_SHIFT) + 1); + count = (evpfn - svpfn) * sizeof(unsigned long); + end = PAGE_SIZE * evpfn; + + ret = -ENOMEM; + page = kzalloc(PAGE_SIZE, GFP_USER); + if (!page) + goto out; + +#ifdef CONFIG_HIGHPTE + pm.ptebuf = kzalloc(PAGE_SIZE, GFP_USER); + if (!pm.ptebuf) + goto out_free; +#endif + + ret = 0; + mm = get_task_mm(task); + if (!mm) + goto out_freepte; + + pm.mm = mm; + pm.next = addr; + pm.buf = page; + pm.pos = src; + pm.count = count; + pm.index = 0; + pm.out = buf; + + if (svpfn == -1) { + add_to_pagemap(pm.next, 0, &pm); + ((char *)page)[0] = (ntohl(1) != 1); + ((char *)page)[1] = PAGE_SHIFT; + ((char *)page)[2] = sizeof(unsigned long); + ((char *)page)[3] = sizeof(unsigned long); + } + + down_read(&mm->mmap_sem); + vma = find_vma(mm, pm.next); + while (pm.count > 0 && vma) { + if (!ptrace_may_attach(task)) { + ret = -EIO; + goto out_mm; + } + vend = min(vma->vm_start - 1, end - 1) + 1; + ret = pagemap_fill(&pm, vend); + if (ret || !pm.count) + break; + vend = min(vma->vm_end - 1, end - 1) + 1; + ret = walk_page_range(mm, vma->vm_start, vend, + &pagemap_walk, &pm); + vma = vma->vm_next; + } + up_read(&mm->mmap_sem); + + ret = pagemap_fill(&pm, end); + + *ppos = pm.pos; + if (!ret) + ret = pm.pos - src; + +out_mm: + mmput(mm); +out_freepte: +#ifdef CONFIG_HIGHPTE + kfree(pm.ptebuf); +out_free: +#endif + kfree(page); +out: + put_task_struct(task); +out_no_task: + return ret; +} + +struct file_operations proc_pagemap_operations = { + .llseek = mem_lseek, /* borrow this */ + .read = pagemap_read, +}; +#endif diff --git a/init/Kconfig b/init/Kconfig index a4d0c126..63bc2c3f 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -426,6 +426,16 @@ config PROC_CLEAR_REFS working set size. Disabling this interface will reduce the size of the kernel for small machines. +config PROC_PAGEMAP + default y + bool "Enable /proc/pid/pagemap support" if EMBEDDED && PROC_FS && MMU + help + The /proc/pid/pagemap interface allows reading the + kernel's virtual memory to page frame mapping to determine which + individual pages a process has mapped and which pages it shares + with other processes. Disabling this interface will reduce the + size of the kernel for small machines. + endmenu # General setup config RT_MUTEXES