ia64/xen-unstable

changeset 13069:1b6354023e64

[XENFB] xenfb_update_screen() calls zap_page_range() while holding spinlock mm_lock.

Changeset 13018:c98ca86138a7422cdf9b15d87c95619b7277bb6a merely sweeps
the bug under the carpet: it silences zap_page_range()'s cries for
help by keeping interrupts enabled. That doesn't fix the bug, and
it's also wrong: if a critical region gets interrupted, and the
interrupt printk()s, xenfb_refresh() gets executed and promptly
deadlocks.

This patch fixes the locking, but leaves open a race between
xenfb_update_screen() and do_no_page(). See the source code for a
detailed explanation of how it works, and where it fails.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
author kfraser@localhost.localdomain
date Fri Dec 15 17:29:25 2006 +0000 (2006-12-15)
parents 2afb01ec2197
children 96b047d22ad5
files linux-2.6-xen-sparse/drivers/xen/fbfront/xenfb.c
line diff
     1.1 --- a/linux-2.6-xen-sparse/drivers/xen/fbfront/xenfb.c	Fri Dec 15 17:27:57 2006 +0000
     1.2 +++ b/linux-2.6-xen-sparse/drivers/xen/fbfront/xenfb.c	Fri Dec 15 17:29:25 2006 +0000
     1.3 @@ -49,8 +49,9 @@ struct xenfb_info
     1.4  	struct timer_list	refresh;
     1.5  	int			dirty;
     1.6  	int			x1, y1, x2, y2;	/* dirty rectangle,
     1.7 -						   protected by mm_lock */
     1.8 -	spinlock_t		mm_lock;
     1.9 +						   protected by dirty_lock */
    1.10 +	spinlock_t		dirty_lock;
    1.11 +	struct mutex		mm_lock;
    1.12  	int			nr_pages;
    1.13  	struct page		**pages;
    1.14  	struct list_head	mappings; /* protected by mm_lock */
    1.15 @@ -64,6 +65,70 @@ struct xenfb_info
    1.16  	struct xenbus_device	*xbdev;
    1.17  };
    1.18  
    1.19 +/*
    1.20 + * How the locks work together
    1.21 + *
    1.22 + * There are two locks: spinlock dirty_lock protecting the dirty
    1.23 + * rectangle, and mutex mm_lock protecting mappings.
    1.24 + *
    1.25 + * The problem is that dirty rectangle and mappings aren't
    1.26 + * independent: the dirty rectangle must cover all faulted pages in
    1.27 + * mappings.  We need to prove that our locking maintains this
    1.28 + * invariant.
    1.29 + *
    1.30 + * There are several kinds of critical regions:
    1.31 + *
    1.32 + * 1. Holding only dirty_lock: xenfb_refresh().  May run in
    1.33 + *    interrupts.  Extends the dirty rectangle.  Trivially preserves
    1.34 + *    invariant.
    1.35 + *
    1.36 + * 2. Holding only mm_lock: xenfb_mmap() and xenfb_vm_close().  Touch
    1.37 + *    only mappings.  The former creates unfaulted pages.  Preserves
    1.38 + *    invariant.  The latter removes pages.  Preserves invariant.
    1.39 + *
    1.40 + * 3. Holding both locks: xenfb_vm_nopage().  Extends the dirty
    1.41 + *    rectangle and updates mappings consistently.  Preserves
    1.42 + *    invariant.
    1.43 + *
    1.44 + * 4. The ugliest one: xenfb_update_screen().  Clear the dirty
    1.45 + *    rectangle and update mappings consistently.
    1.46 + *
    1.47 + *    We can't simply hold both locks, because zap_page_range() cannot
    1.48 + *    be called with a spinlock held.
    1.49 + *
    1.50 + *    Therefore, we first clear the dirty rectangle with both locks
    1.51 + *    held.  Then we unlock dirty_lock and update the mappings.
    1.52 + *    Critical regions that hold only dirty_lock may interfere with
    1.53 + *    that.  This can only be region 1: xenfb_refresh().  But that
    1.54 + *    just extends the dirty rectangle, which can't harm the
    1.55 + *    invariant.
    1.56 + *
    1.57 + * But FIXME: the invariant is too weak.  It misses that the fault
    1.58 + * record in mappings must be consistent with the mapping of pages in
    1.59 + * the associated address space!  do_no_page() updates the PTE after
    1.60 + * xenfb_vm_nopage() returns, i.e. outside the critical region.  This
    1.61 + * allows the following race:
    1.62 + *
    1.63 + * X writes to some address in the Xen frame buffer
    1.64 + * Fault - call do_no_page()
    1.65 + *     call xenfb_vm_nopage()
    1.66 + *         grab mm_lock
    1.67 + *         map->faults++;
    1.68 + *         release mm_lock
    1.69 + *     return back to do_no_page()
    1.70 + * (preempted, or SMP)
    1.71 + * Xen worker thread runs.
    1.72 + *      grab mm_lock
    1.73 + *      look at mappings
    1.74 + *          find this mapping, zaps its pages (but page not in pte yet)
    1.75 + *          clear map->faults
    1.76 + *      releases mm_lock
    1.77 + * (back to X process)
    1.78 + *     put page in X's pte
    1.79 + *
    1.80 + * Oh well, we wont be updating the writes to this page anytime soon.
    1.81 + */
    1.82 +
    1.83  static int xenfb_fps = 20;
    1.84  static unsigned long xenfb_mem_len = XENFB_WIDTH * XENFB_HEIGHT * XENFB_DEPTH / 8;
    1.85  
    1.86 @@ -105,6 +170,7 @@ static int xenfb_queue_full(struct xenfb
    1.87  
    1.88  static void xenfb_update_screen(struct xenfb_info *info)
    1.89  {
    1.90 +	unsigned long flags;
    1.91  	int y1, y2, x1, x2;
    1.92  	struct xenfb_mapping *map;
    1.93  
    1.94 @@ -113,14 +179,16 @@ static void xenfb_update_screen(struct x
    1.95  	if (xenfb_queue_full(info))
    1.96  		return;
    1.97  
    1.98 -	spin_lock(&info->mm_lock);
    1.99 +	mutex_lock(&info->mm_lock);
   1.100  
   1.101 +	spin_lock_irqsave(&info->dirty_lock, flags);
   1.102  	y1 = info->y1;
   1.103  	y2 = info->y2;
   1.104  	x1 = info->x1;
   1.105  	x2 = info->x2;
   1.106  	info->x1 = info->y1 = INT_MAX;
   1.107  	info->x2 = info->y2 = 0;
   1.108 +	spin_unlock_irqrestore(&info->dirty_lock, flags);
   1.109  
   1.110  	list_for_each_entry(map, &info->mappings, link) {
   1.111  		if (!map->faults)
   1.112 @@ -130,7 +198,7 @@ static void xenfb_update_screen(struct x
   1.113  		map->faults = 0;
   1.114  	}
   1.115  
   1.116 -	spin_unlock(&info->mm_lock);
   1.117 +	mutex_unlock(&info->mm_lock);
   1.118  
   1.119  	xenfb_do_update(info, x1, y1, x2 - x1, y2 - y1);
   1.120  }
   1.121 @@ -213,9 +281,11 @@ static void __xenfb_refresh(struct xenfb
   1.122  static void xenfb_refresh(struct xenfb_info *info,
   1.123  			  int x1, int y1, int w, int h)
   1.124  {
   1.125 -	spin_lock(&info->mm_lock);
   1.126 +	unsigned long flags;
   1.127 +
   1.128 +	spin_lock_irqsave(&info->dirty_lock, flags);
   1.129  	__xenfb_refresh(info, x1, y1, w, h);
   1.130 -	spin_unlock(&info->mm_lock);
   1.131 +	spin_unlock_irqrestore(&info->dirty_lock, flags);
   1.132  }
   1.133  
   1.134  static void xenfb_fillrect(struct fb_info *p, const struct fb_fillrect *rect)
   1.135 @@ -253,12 +323,12 @@ static void xenfb_vm_close(struct vm_are
   1.136  	struct xenfb_mapping *map = vma->vm_private_data;
   1.137  	struct xenfb_info *info = map->info;
   1.138  
   1.139 -	spin_lock(&info->mm_lock);
   1.140 +	mutex_lock(&info->mm_lock);
   1.141  	if (atomic_dec_and_test(&map->map_refs)) {
   1.142  		list_del(&map->link);
   1.143  		kfree(map);
   1.144  	}
   1.145 -	spin_unlock(&info->mm_lock);
   1.146 +	mutex_unlock(&info->mm_lock);
   1.147  }
   1.148  
   1.149  static struct page *xenfb_vm_nopage(struct vm_area_struct *vma,
   1.150 @@ -267,13 +337,15 @@ static struct page *xenfb_vm_nopage(stru
   1.151  	struct xenfb_mapping *map = vma->vm_private_data;
   1.152  	struct xenfb_info *info = map->info;
   1.153  	int pgnr = (vaddr - vma->vm_start) >> PAGE_SHIFT;
   1.154 +	unsigned long flags;
   1.155  	struct page *page;
   1.156  	int y1, y2;
   1.157  
   1.158  	if (pgnr >= info->nr_pages)
   1.159  		return NOPAGE_SIGBUS;
   1.160  
   1.161 -	spin_lock(&info->mm_lock);
   1.162 +	mutex_lock(&info->mm_lock);
   1.163 +	spin_lock_irqsave(&info->dirty_lock, flags);
   1.164  	page = info->pages[pgnr];
   1.165  	get_page(page);
   1.166  	map->faults++;
   1.167 @@ -283,7 +355,8 @@ static struct page *xenfb_vm_nopage(stru
   1.168  	if (y2 > info->fb_info->var.yres)
   1.169  		y2 = info->fb_info->var.yres;
   1.170  	__xenfb_refresh(info, 0, y1, info->fb_info->var.xres, y2 - y1);
   1.171 -	spin_unlock(&info->mm_lock);
   1.172 +	spin_unlock_irqrestore(&info->dirty_lock, flags);
   1.173 +	mutex_unlock(&info->mm_lock);
   1.174  
   1.175  	if (type)
   1.176  		*type = VM_FAULT_MINOR;
   1.177 @@ -323,9 +396,9 @@ static int xenfb_mmap(struct fb_info *fb
   1.178  	map->info = info;
   1.179  	atomic_set(&map->map_refs, 1);
   1.180  
   1.181 -	spin_lock(&info->mm_lock);
   1.182 +	mutex_lock(&info->mm_lock);
   1.183  	list_add(&map->link, &info->mappings);
   1.184 -	spin_unlock(&info->mm_lock);
   1.185 +	mutex_unlock(&info->mm_lock);
   1.186  
   1.187  	vma->vm_ops = &xenfb_vm_ops;
   1.188  	vma->vm_flags |= (VM_DONTEXPAND | VM_RESERVED);
   1.189 @@ -382,7 +455,8 @@ static int __devinit xenfb_probe(struct 
   1.190  	info->xbdev = dev;
   1.191  	info->irq = -1;
   1.192  	info->x1 = info->y1 = INT_MAX;
   1.193 -	spin_lock_init(&info->mm_lock);
   1.194 +	spin_lock_init(&info->dirty_lock);
   1.195 +	mutex_init(&info->mm_lock);
   1.196  	init_waitqueue_head(&info->wq);
   1.197  	init_timer(&info->refresh);
   1.198  	info->refresh.function = xenfb_timer;