ia64/linux-2.6.18-xen.hg

view drivers/xen/blktap2/request.c @ 878:eba6fe6d8d53

blktap2: a completely rewritten blktap implementation

Benefits to blktap2 over the old version of blktap:

* Isolation from xenstore - Blktap devices are now created directly on
the linux dom0 command line, rather than being spawned in response
to XenStore events. This is handy for debugging, makes blktap
generally easier to work with, and is a step toward a generic
user-level block device implementation that is not Xen-specific.

* Improved tapdisk infrastructure: simpler request forwarding, new
request scheduler, request merging, more efficient use of AIO.

* Improved tapdisk error handling and memory management. No
allocations on the block data path, IO retry logic to protect
guests
transient block device failures. This has been tested and is known
to work on weird environments such as NFS soft mounts.

* Pause and snapshot of live virtual disks (see xmsnap script).

* VHD support. The VHD code in this release has been rigorously
tested, and represents a very mature implementation of the VHD
image
format.

* No more duplication of mechanism with blkback. The blktap kernel
module has changed dramatically from the original blktap. Blkback
is now always used to talk to Xen guests, blktap just presents a
Linux gendisk that blkback can export. This is done while
preserving the zero-copy data path from domU to physical device.

These patches deprecate the old blktap code, which can hopefully be
removed from the tree completely at some point in the future.

Signed-off-by: Jake Wires <jake.wires@citrix.com>
Signed-off-by: Dutch Meyer <dmeyer@cs.ubc.ca>
author Keir Fraser <keir.fraser@citrix.com>
date Tue May 26 11:23:16 2009 +0100 (2009-05-26)
parents
children
line source
1 #include <linux/spinlock.h>
2 #include <xen/balloon.h>
4 #include "blktap.h"
6 #define MAX_BUCKETS 8
7 #define BUCKET_SIZE MAX_PENDING_REQS
9 #define BLKTAP_POOL_CLOSING 1
11 struct blktap_request_bucket;
13 struct blktap_request_handle {
14 int slot;
15 uint8_t inuse;
16 struct blktap_request request;
17 struct blktap_request_bucket *bucket;
18 };
20 struct blktap_request_bucket {
21 atomic_t reqs_in_use;
22 struct blktap_request_handle handles[BUCKET_SIZE];
23 struct page **foreign_pages;
24 };
26 struct blktap_request_pool {
27 spinlock_t lock;
28 uint8_t status;
29 struct list_head free_list;
30 atomic_t reqs_in_use;
31 wait_queue_head_t wait_queue;
32 struct blktap_request_bucket *buckets[MAX_BUCKETS];
33 };
35 static struct blktap_request_pool pool;
37 static inline struct blktap_request_handle *
38 blktap_request_to_handle(struct blktap_request *req)
39 {
40 return container_of(req, struct blktap_request_handle, request);
41 }
43 static void
44 blktap_request_pool_init_request(struct blktap_request *request)
45 {
46 int i;
48 request->usr_idx = -1;
49 request->nr_pages = 0;
50 request->status = BLKTAP_REQUEST_FREE;
51 INIT_LIST_HEAD(&request->free_list);
52 for (i = 0; i < ARRAY_SIZE(request->handles); i++) {
53 request->handles[i].user = INVALID_GRANT_HANDLE;
54 request->handles[i].kernel = INVALID_GRANT_HANDLE;
55 }
56 }
58 static int
59 blktap_request_pool_allocate_bucket(void)
60 {
61 int i, idx;
62 unsigned long flags;
63 struct blktap_request *request;
64 struct blktap_request_handle *handle;
65 struct blktap_request_bucket *bucket;
67 bucket = kzalloc(sizeof(struct blktap_request_bucket), GFP_KERNEL);
68 if (!bucket)
69 goto fail;
71 bucket->foreign_pages = alloc_empty_pages_and_pagevec(MMAP_PAGES);
72 if (!bucket->foreign_pages)
73 goto fail;
75 spin_lock_irqsave(&pool.lock, flags);
77 idx = -1;
78 for (i = 0; i < MAX_BUCKETS; i++) {
79 if (!pool.buckets[i]) {
80 idx = i;
81 pool.buckets[idx] = bucket;
82 break;
83 }
84 }
86 if (idx == -1) {
87 spin_unlock_irqrestore(&pool.lock, flags);
88 goto fail;
89 }
91 for (i = 0; i < BUCKET_SIZE; i++) {
92 handle = bucket->handles + i;
93 request = &handle->request;
95 handle->slot = i;
96 handle->inuse = 0;
97 handle->bucket = bucket;
99 blktap_request_pool_init_request(request);
100 list_add_tail(&request->free_list, &pool.free_list);
101 }
103 spin_unlock_irqrestore(&pool.lock, flags);
105 return 0;
107 fail:
108 if (bucket && bucket->foreign_pages)
109 free_empty_pages_and_pagevec(bucket->foreign_pages, MMAP_PAGES);
110 kfree(bucket);
111 return -ENOMEM;
112 }
114 static void
115 blktap_request_pool_free_bucket(struct blktap_request_bucket *bucket)
116 {
117 if (!bucket)
118 return;
120 BTDBG("freeing bucket %p\n", bucket);
122 free_empty_pages_and_pagevec(bucket->foreign_pages, MMAP_PAGES);
123 kfree(bucket);
124 }
126 unsigned long
127 request_to_kaddr(struct blktap_request *req, int seg)
128 {
129 struct blktap_request_handle *handle = blktap_request_to_handle(req);
130 int idx = handle->slot * BLKIF_MAX_SEGMENTS_PER_REQUEST + seg;
131 unsigned long pfn = page_to_pfn(handle->bucket->foreign_pages[idx]);
132 return (unsigned long)pfn_to_kaddr(pfn);
133 }
135 int
136 blktap_request_pool_shrink(void)
137 {
138 int i, err;
139 unsigned long flags;
140 struct blktap_request_bucket *bucket;
142 err = -EAGAIN;
144 spin_lock_irqsave(&pool.lock, flags);
146 /* always keep at least one bucket */
147 for (i = 1; i < MAX_BUCKETS; i++) {
148 bucket = pool.buckets[i];
149 if (!bucket)
150 continue;
152 if (atomic_read(&bucket->reqs_in_use))
153 continue;
155 blktap_request_pool_free_bucket(bucket);
156 pool.buckets[i] = NULL;
157 err = 0;
158 break;
159 }
161 spin_unlock_irqrestore(&pool.lock, flags);
163 return err;
164 }
166 int
167 blktap_request_pool_grow(void)
168 {
169 return blktap_request_pool_allocate_bucket();
170 }
172 struct blktap_request *
173 blktap_request_allocate(struct blktap *tap)
174 {
175 int i;
176 uint16_t usr_idx;
177 unsigned long flags;
178 struct blktap_request *request;
180 usr_idx = -1;
181 request = NULL;
183 spin_lock_irqsave(&pool.lock, flags);
185 if (pool.status == BLKTAP_POOL_CLOSING)
186 goto out;
188 for (i = 0; i < ARRAY_SIZE(tap->pending_requests); i++)
189 if (!tap->pending_requests[i]) {
190 usr_idx = i;
191 break;
192 }
194 if (usr_idx == (uint16_t)-1)
195 goto out;
197 if (!list_empty(&pool.free_list)) {
198 request = list_entry(pool.free_list.next,
199 struct blktap_request, free_list);
200 list_del(&request->free_list);
201 }
203 if (request) {
204 struct blktap_request_handle *handle;
206 atomic_inc(&pool.reqs_in_use);
208 handle = blktap_request_to_handle(request);
209 atomic_inc(&handle->bucket->reqs_in_use);
210 handle->inuse = 1;
212 request->usr_idx = usr_idx;
214 tap->pending_requests[usr_idx] = request;
215 tap->pending_cnt++;
216 }
218 out:
219 spin_unlock_irqrestore(&pool.lock, flags);
220 return request;
221 }
223 void
224 blktap_request_free(struct blktap *tap, struct blktap_request *request)
225 {
226 int free;
227 unsigned long flags;
228 struct blktap_request_handle *handle;
230 BUG_ON(request->usr_idx >= ARRAY_SIZE(tap->pending_requests));
231 handle = blktap_request_to_handle(request);
233 spin_lock_irqsave(&pool.lock, flags);
235 handle->inuse = 0;
236 tap->pending_requests[request->usr_idx] = NULL;
237 blktap_request_pool_init_request(request);
238 list_add(&request->free_list, &pool.free_list);
239 atomic_dec(&handle->bucket->reqs_in_use);
240 free = atomic_dec_and_test(&pool.reqs_in_use);
242 spin_unlock_irqrestore(&pool.lock, flags);
244 if (--tap->pending_cnt == 0)
245 wake_up_interruptible(&tap->wq);
247 if (free)
248 wake_up(&pool.wait_queue);
249 }
251 void
252 blktap_request_pool_free(void)
253 {
254 int i;
255 unsigned long flags;
257 spin_lock_irqsave(&pool.lock, flags);
259 pool.status = BLKTAP_POOL_CLOSING;
260 while (atomic_read(&pool.reqs_in_use)) {
261 spin_unlock_irqrestore(&pool.lock, flags);
262 wait_event(pool.wait_queue, !atomic_read(&pool.reqs_in_use));
263 spin_lock_irqsave(&pool.lock, flags);
264 }
266 for (i = 0; i < MAX_BUCKETS; i++) {
267 blktap_request_pool_free_bucket(pool.buckets[i]);
268 pool.buckets[i] = NULL;
269 }
271 spin_unlock_irqrestore(&pool.lock, flags);
272 }
274 int
275 blktap_request_pool_init(void)
276 {
277 int i, err;
279 memset(&pool, 0, sizeof(pool));
281 spin_lock_init(&pool.lock);
282 INIT_LIST_HEAD(&pool.free_list);
283 atomic_set(&pool.reqs_in_use, 0);
284 init_waitqueue_head(&pool.wait_queue);
286 for (i = 0; i < 2; i++) {
287 err = blktap_request_pool_allocate_bucket();
288 if (err)
289 goto fail;
290 }
292 return 0;
294 fail:
295 blktap_request_pool_free();
296 return err;
297 }