From 93dab19774c4493fcbec429569493837cb83213d Mon Sep 17 00:00:00 2001 From: t_jeang Date: Tue, 6 Jan 2009 12:06:06 +0000 Subject: [PATCH] Add extra debugging code to find out what's going on when unaccounted space is used in swap. --- mm/shmem.c | 107 +++++++++++++++++++++ mm/swapfile.c | 252 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 359 insertions(+) diff --git a/mm/shmem.c b/mm/shmem.c index 2106dd71..1f11aad1 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include @@ -890,6 +891,112 @@ int shmem_unuse(swp_entry_t entry, struct page *page) return found; } +#ifdef CONFIG_PROC_FS +static int debug_si_shmem_inode(struct seq_file *swap, struct shmem_inode_info *info, swp_entry_t entry) +{ + struct inode *inode; + unsigned long idx; + unsigned long size; + unsigned long limit; + unsigned long stage; + struct page **dir; + struct page *subdir; + swp_entry_t *ptr; + int offset; + int major, partno; + + idx = 0; + ptr = info->i_direct; + spin_lock(&info->lock); + limit = info->next_index; + size = limit; + if (size > SHMEM_NR_DIRECT) + size = SHMEM_NR_DIRECT; + offset = shmem_find_swp(entry, ptr, ptr+size); + if (offset >= 0) { + shmem_swp_balance_unmap(); + goto found; + } + if (!info->i_indirect) + goto lost2; + + dir = shmem_dir_map(info->i_indirect); + stage = SHMEM_NR_DIRECT + ENTRIES_PER_PAGEPAGE/2; + + for (idx = SHMEM_NR_DIRECT; idx < limit; idx += ENTRIES_PER_PAGE, dir++) { + if (unlikely(idx == stage)) { + shmem_dir_unmap(dir-1); + dir = shmem_dir_map(info->i_indirect) + + ENTRIES_PER_PAGE/2 + idx/ENTRIES_PER_PAGEPAGE; + while (!*dir) { + dir++; + idx += ENTRIES_PER_PAGEPAGE; + if (idx >= limit) + goto lost1; + } + stage = idx + ENTRIES_PER_PAGEPAGE; + subdir = *dir; + shmem_dir_unmap(dir); + dir = shmem_dir_map(subdir); + } + subdir = *dir; + if (subdir && page_private(subdir)) { + ptr = shmem_swp_map(subdir); + size = limit - idx; + if (size > ENTRIES_PER_PAGE) + size = ENTRIES_PER_PAGE; + offset = shmem_find_swp(entry, ptr, ptr+size); + if (offset >= 0) { + shmem_dir_unmap(dir); + goto found; + } + shmem_swp_unmap(ptr); + } + } +lost1: + shmem_dir_unmap(dir-1); +lost2: + spin_unlock(&info->lock); + return 0; +found: + idx += offset; + inode = &info->vfs_inode; + major = 0; + partno = 0; + if (inode->i_sb->s_bdev) { + if (inode->i_sb->s_bdev->bd_disk) + major = inode->i_sb->s_bdev->bd_disk->major; + if (inode->i_sb->s_bdev->bd_part) + partno = inode->i_sb->s_bdev->bd_part->partno; + } + seq_printf(swap, "entry %lx from shmem mapping of inode %lx/%ld on %x:%x/%.32s\n", + entry.val, inode->i_ino, inode->i_ino, major, partno, inode->i_sb->s_id); + shmem_swp_unmap(ptr); + spin_unlock(&info->lock); + return 1; +} + +int debug_si_shmem(struct seq_file *swap, swp_entry_t entry) +{ + struct list_head *p, *next; + struct shmem_inode_info *info; + int found = 0; + + spin_lock(&shmem_swaplist_lock); + list_for_each_safe(p, next, &shmem_swaplist) { + info = list_entry(p, struct shmem_inode_info, swaplist); + if (!info->swapped) + continue; + if (debug_si_shmem_inode(swap, info, entry)) { + found = 1; + break; + } + } + spin_unlock(&shmem_swaplist_lock); + return found; +} +#endif + /* * Move the page from the page cache to the swap cache. */ diff --git a/mm/swapfile.c b/mm/swapfile.c index 02dad6be..40cfd0ff 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1264,7 +1264,237 @@ out: return err; } + #ifdef CONFIG_PROC_FS +static struct task_struct *get_task_for_mm(struct mm_struct *mm) +{ + struct task_struct *p; + + for_each_process(p) { + if (p->mm == mm) + break; + } + if (p == &init_task) + p = NULL; + return p; +} + +static int debug_si_pte_range(struct seq_file *swap, + struct vm_area_struct *vma, pmd_t *pmd, + unsigned long addr, unsigned long end, + swp_entry_t entry) +{ + pte_t swp_pte = swp_entry_to_pte(entry); + pte_t *pte; + spinlock_t *ptl; + int found = 0; + + pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); + do { + /* + * swapoff spends a _lot_ of time in this loop! + * Test inline before going to call unuse_pte. + */ + if (pte_same(*pte, swp_pte)) { + struct task_struct *task = get_task_for_mm(vma->vm_mm); + seq_printf(swap, + "entry %lx is in vma %p of process %d/%.16s at 0x%08lx\n", + entry.val, vma, task ? task->pid : -1, + task ? task->comm : "", addr); + found = 1; + break; + } + } while (pte++, addr += PAGE_SIZE, addr != end); + pte_unmap_unlock(pte - 1, ptl); + return found; +} + +static inline int debug_si_pmd_range(struct seq_file *swap, + struct vm_area_struct *vma, pud_t *pud, + unsigned long addr, unsigned long end, + swp_entry_t entry) +{ + pmd_t *pmd; + unsigned long next; + + pmd = pmd_offset(pud, addr); + do { + next = pmd_addr_end(addr, end); + if (pmd_none_or_clear_bad(pmd)) + continue; + if (debug_si_pte_range(swap, vma, pmd, addr, next, entry)) + return 1; + } while (pmd++, addr = next, addr != end); + return 0; +} + +static inline int debug_si_pud_range(struct seq_file *swap, + struct vm_area_struct *vma, pgd_t *pgd, + unsigned long addr, unsigned long end, + swp_entry_t entry) +{ + pud_t *pud; + unsigned long next; + + pud = pud_offset(pgd, addr); + do { + next = pud_addr_end(addr, end); + if (pud_none_or_clear_bad(pud)) + continue; + if (debug_si_pmd_range(swap, vma, pud, addr, next, entry)) + return 1; + } while (pud++, addr = next, addr != end); + return 0; +} + +static int debug_si_vma(struct seq_file *swap, + struct vm_area_struct *vma, + swp_entry_t entry) +{ + pgd_t *pgd; + unsigned long addr, end, next; + + addr = vma->vm_start; + end = vma->vm_end; + + pgd = pgd_offset(vma->vm_mm, addr); + do { + next = pgd_addr_end(addr, end); + if (pgd_none_or_clear_bad(pgd)) + continue; + if (debug_si_pud_range(swap, vma, pgd, addr, next, entry)) + return 1; + } while (pgd++, addr = next, addr != end); + return 0; +} + +static int debug_si_mm(struct seq_file *swap, + struct mm_struct *mm, + swp_entry_t entry) +{ + struct vm_area_struct *vma; + + if (!down_read_trylock(&mm->mmap_sem)) { + /* + * Activate page so shrink_cache is unlikely to unmap its + * ptes while lock is dropped, so swapoff can make progress. + */ + down_read(&mm->mmap_sem); + } + for (vma = mm->mmap; vma; vma = vma->vm_next) { + if (vma->anon_vma && debug_si_vma(swap, vma, entry)) + break; + } + up_read(&mm->mmap_sem); + /* + * Currently unuse_mm cannot fail, but leave error handling + * at call sites for now, since we change it from time to time. + */ + return vma ? 1 : 0; +} + +static void *swapdebug_start(struct seq_file *swap, loff_t *pos) +{ + struct swap_info_struct *ptr = swap_info; + int i; + loff_t l = *pos; + + mutex_lock(&swapon_mutex); + + swap->private = (void *)0; + + for (i = 0; i < nr_swapfiles; i++, ptr++) { + if (!(ptr->flags & SWP_USED) || !ptr->swap_map) + continue; + if (!l--) + return ptr; + } + + return NULL; +} + +static void *swapdebug_next(struct seq_file *swap, void *v, loff_t *pos) +{ + struct swap_info_struct *ptr = v; + struct swap_info_struct *endptr = swap_info + nr_swapfiles; + int idx = (int)swap->private; + + if (idx != -1) { + swap->private = (void *)(idx + 1); + return ptr; + } + + for (++ptr; ptr < endptr; ptr++) { + if (!(ptr->flags & SWP_USED) || !ptr->swap_map) + continue; + ++*pos; + return ptr; + } + + return NULL; +} + +static void swapdebug_stop(struct seq_file *swap, void *v) +{ + mutex_unlock(&swapon_mutex); +} + +int swapdebug_show(struct seq_file *swap, void *ptr) +{ + struct swap_info_struct *si = ptr; + unsigned short *swap_map; + int i, type; + int count, found; + swp_entry_t entry; + struct mm_struct *prev_mm, *mm; + struct list_head *pmm; + extern int debug_si_shmem(struct seq_file *swap, swp_entry_t entry); + + spin_lock(&swap_lock); + for (type = swap_list.head; type >= 0; type = swap_info[type].next) { + if (si == swap_info + type) + break; + } + spin_unlock(&swap_lock); + if (type < 0) { + seq_printf(swap, "swap info %d doesn't exit\n", type); + return 0; + } + + i = (int)swap->private; + count = si->swap_map[i]; + if (!count || count == SWAP_MAP_BAD) + goto out; + swap_map = &si->swap_map[i]; + entry = swp_entry(type, i); + count = *swap_map; + seq_printf(swap, "%x/%lx count %d\n", i, entry.val, count); + prev_mm = &init_mm; + atomic_inc(&prev_mm->mm_users); + found = 0; + if (debug_si_shmem(swap, entry)) + found++; + pmm = &prev_mm->mmlist; + spin_lock(&mmlist_lock); + while (found < count && (pmm = pmm->next) != &init_mm.mmlist) { + mm = list_entry(pmm, struct mm_struct, mmlist); + if (!atomic_inc_not_zero(&mm->mm_users)) + continue; + spin_unlock(&mmlist_lock); + mmput(prev_mm); + prev_mm = mm; + if (debug_si_mm(swap, mm, entry)) + found++; + spin_lock(&mmlist_lock); + } + spin_unlock(&mmlist_lock); + mmput(prev_mm); +out: + if (i == si->max - 1) + swap->private = (void *)-1; + return 0; +} + /* iterator */ static void *swap_start(struct seq_file *swap, loff_t *pos) { @@ -1344,6 +1574,25 @@ static struct file_operations proc_swaps_operations = { .release = seq_release, }; +static struct seq_operations swapdebug_op = { + .start = swapdebug_start, + .next = swapdebug_next, + .stop = swapdebug_stop, + .show = swapdebug_show +}; + +static int swapdebug_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &swapdebug_op); +} + +static struct file_operations proc_swapdebug_operations = { + .open = swapdebug_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + static int __init procswaps_init(void) { struct proc_dir_entry *entry; @@ -1351,6 +1600,9 @@ static int __init procswaps_init(void) entry = create_proc_entry("swaps", 0, NULL); if (entry) entry->proc_fops = &proc_swaps_operations; + entry = create_proc_entry("swapdebug", 0, NULL); + if (entry) + entry->proc_fops = &proc_swapdebug_operations; return 0; } __initcall(procswaps_init); -- 2.39.5