ia64/xen-unstable

view extras/mini-os/blkfront.c @ 16995:0672d5b11262

minios: Fix blkfront grant map leak

Signed-off-by: Samuel Thibault <samuel.thibault@eu.citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Thu Feb 07 09:16:20 2008 +0000 (2008-02-07)
parents c9844192c965
children a905c582a406
line source
1 /* Minimal block driver for Mini-OS.
2 * Copyright (c) 2007-2008 Samuel Thibault.
3 * Based on netfront.c.
4 */
6 #include <os.h>
7 #include <xenbus.h>
8 #include <events.h>
9 #include <errno.h>
10 #include <xen/io/blkif.h>
11 #include <gnttab.h>
12 #include <xmalloc.h>
13 #include <time.h>
14 #include <blkfront.h>
15 #include <lib.h>
16 #include <fcntl.h>
18 /* Note: we generally don't need to disable IRQs since we hardly do anything in
19 * the interrupt handler. */
21 /* Note: we really suppose non-preemptive threads. */
23 DECLARE_WAIT_QUEUE_HEAD(blkfront_queue);
28 #define BLK_RING_SIZE __RING_SIZE((struct blkif_sring *)0, PAGE_SIZE)
29 #define GRANT_INVALID_REF 0
32 struct blk_buffer {
33 void* page;
34 grant_ref_t gref;
35 };
37 struct blkfront_dev {
38 domid_t dom;
40 struct blkif_front_ring ring;
41 grant_ref_t ring_ref;
42 evtchn_port_t evtchn, local_port;
43 blkif_vdev_t handle;
45 char *nodename;
46 char *backend;
47 unsigned sector_size;
48 unsigned sectors;
49 int mode;
50 int barrier;
51 int flush;
52 };
54 static inline int xenblk_rxidx(RING_IDX idx)
55 {
56 return idx & (BLK_RING_SIZE - 1);
57 }
59 void blkfront_handler(evtchn_port_t port, struct pt_regs *regs, void *data)
60 {
61 wake_up(&blkfront_queue);
62 }
64 struct blkfront_dev *init_blkfront(char *nodename, uint64_t *sectors, unsigned *sector_size, int *mode)
65 {
66 xenbus_transaction_t xbt;
67 char* err;
68 char* message=NULL;
69 struct blkif_sring *s;
70 int retry=0;
71 char* msg;
72 char* c;
74 struct blkfront_dev *dev;
76 if (!nodename)
77 nodename = "device/vbd/768";
79 char path[strlen(nodename) + 1 + 10 + 1];
81 printk("******************* BLKFRONT for %s **********\n\n\n", nodename);
83 dev = malloc(sizeof(*dev));
84 dev->nodename = strdup(nodename);
86 evtchn_alloc_unbound_t op;
87 op.dom = DOMID_SELF;
88 snprintf(path, sizeof(path), "%s/backend-id", nodename);
89 dev->dom = op.remote_dom = xenbus_read_integer(path);
90 HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, &op);
91 clear_evtchn(op.port); /* Without, handler gets invoked now! */
92 dev->local_port = bind_evtchn(op.port, blkfront_handler, dev);
93 dev->evtchn=op.port;
95 s = (struct blkif_sring*) alloc_page();
96 memset(s,0,PAGE_SIZE);
99 SHARED_RING_INIT(s);
100 FRONT_RING_INIT(&dev->ring, s, PAGE_SIZE);
102 dev->ring_ref = gnttab_grant_access(dev->dom,virt_to_mfn(s),0);
104 // FIXME: proper frees on failures
105 again:
106 err = xenbus_transaction_start(&xbt);
107 if (err) {
108 printk("starting transaction\n");
109 }
111 err = xenbus_printf(xbt, nodename, "ring-ref","%u",
112 dev->ring_ref);
113 if (err) {
114 message = "writing ring-ref";
115 goto abort_transaction;
116 }
117 err = xenbus_printf(xbt, nodename,
118 "event-channel", "%u", dev->evtchn);
119 if (err) {
120 message = "writing event-channel";
121 goto abort_transaction;
122 }
124 err = xenbus_printf(xbt, nodename, "state", "%u",
125 4); /* connected */
128 err = xenbus_transaction_end(xbt, 0, &retry);
129 if (retry) {
130 goto again;
131 printk("completing transaction\n");
132 }
134 goto done;
136 abort_transaction:
137 xenbus_transaction_end(xbt, 1, &retry);
138 return NULL;
140 done:
142 snprintf(path, sizeof(path), "%s/backend", nodename);
143 msg = xenbus_read(XBT_NIL, path, &dev->backend);
144 if (msg) {
145 printk("Error %s when reading the backend path %s\n", msg, path);
146 return NULL;
147 }
149 printk("backend at %s\n", dev->backend);
151 dev->handle = simple_strtoul(strrchr(nodename, '/')+1, NULL, 0);
153 {
154 char path[strlen(dev->backend) + 1 + 19 + 1];
155 snprintf(path, sizeof(path), "%s/mode", dev->backend);
156 msg = xenbus_read(XBT_NIL, path, &c);
157 if (msg) {
158 printk("Error %s when reading the mode\n", msg);
159 return NULL;
160 }
161 if (*c == 'w')
162 *mode = dev->mode = O_RDWR;
163 else
164 *mode = dev->mode = O_RDONLY;
165 free(c);
167 snprintf(path, sizeof(path), "%s/state", dev->backend);
169 xenbus_watch_path(XBT_NIL, path);
171 xenbus_wait_for_value(path,"4");
173 xenbus_unwatch_path(XBT_NIL, path);
175 snprintf(path, sizeof(path), "%s/sectors", dev->backend);
176 // FIXME: read_integer returns an int, so disk size limited to 1TB for now
177 *sectors = dev->sectors = xenbus_read_integer(path);
179 snprintf(path, sizeof(path), "%s/sector-size", dev->backend);
180 *sector_size = dev->sector_size = xenbus_read_integer(path);
182 snprintf(path, sizeof(path), "%s/feature-barrier", dev->backend);
183 dev->barrier = xenbus_read_integer(path);
185 snprintf(path, sizeof(path), "%s/feature-flush-cache", dev->backend);
186 dev->flush = xenbus_read_integer(path);
187 }
189 printk("%u sectors of %u bytes\n", dev->sectors, dev->sector_size);
190 printk("**************************\n");
192 return dev;
193 }
195 void shutdown_blkfront(struct blkfront_dev *dev)
196 {
197 char* err;
198 char *nodename = dev->nodename;
200 char path[strlen(dev->backend) + 1 + 5 + 1];
202 blkfront_sync(dev);
204 printk("close blk: backend at %s\n",dev->backend);
206 snprintf(path, sizeof(path), "%s/state", dev->backend);
207 err = xenbus_printf(XBT_NIL, nodename, "state", "%u", 5); /* closing */
208 xenbus_wait_for_value(path,"5");
210 err = xenbus_printf(XBT_NIL, nodename, "state", "%u", 6);
211 xenbus_wait_for_value(path,"6");
213 unbind_evtchn(dev->local_port);
215 free(nodename);
216 free(dev->backend);
217 free(dev);
218 }
220 static void blkfront_wait_slot(struct blkfront_dev *dev)
221 {
222 /* Wait for a slot */
223 if (RING_FULL(&dev->ring)) {
224 unsigned long flags;
225 DEFINE_WAIT(w);
226 local_irq_save(flags);
227 while (1) {
228 blkfront_aio_poll(dev);
229 if (!RING_FULL(&dev->ring))
230 break;
231 /* Really no slot, go to sleep. */
232 add_waiter(w, blkfront_queue);
233 local_irq_restore(flags);
234 schedule();
235 local_irq_save(flags);
236 }
237 remove_waiter(w);
238 local_irq_restore(flags);
239 }
240 }
242 /* Issue an aio */
243 void blkfront_aio(struct blkfront_aiocb *aiocbp, int write)
244 {
245 struct blkfront_dev *dev = aiocbp->aio_dev;
246 struct blkif_request *req;
247 RING_IDX i;
248 int notify;
249 int n, j;
250 uintptr_t start, end;
252 // Can't io at non-sector-aligned location
253 ASSERT(!(aiocbp->aio_offset & (dev->sector_size-1)));
254 // Can't io non-sector-sized amounts
255 ASSERT(!(aiocbp->aio_nbytes & (dev->sector_size-1)));
256 // Can't io non-sector-aligned buffer
257 ASSERT(!((uintptr_t) aiocbp->aio_buf & (dev->sector_size-1)));
259 start = (uintptr_t)aiocbp->aio_buf & PAGE_MASK;
260 end = ((uintptr_t)aiocbp->aio_buf + aiocbp->aio_nbytes + PAGE_SIZE - 1) & PAGE_MASK;
261 aiocbp->n = n = (end - start) / PAGE_SIZE;
263 /* qemu's IDE max multsect is 16 (8KB) and SCSI max DMA was set to 32KB,
264 * so max 44KB can't happen */
265 ASSERT(n <= BLKIF_MAX_SEGMENTS_PER_REQUEST);
267 blkfront_wait_slot(dev);
268 i = dev->ring.req_prod_pvt;
269 req = RING_GET_REQUEST(&dev->ring, i);
271 req->operation = write ? BLKIF_OP_WRITE : BLKIF_OP_READ;
272 req->nr_segments = n;
273 req->handle = dev->handle;
274 req->id = (uintptr_t) aiocbp;
275 req->sector_number = aiocbp->aio_offset / dev->sector_size;
277 for (j = 0; j < n; j++) {
278 uintptr_t data = start + j * PAGE_SIZE;
279 if (!write) {
280 /* Trigger CoW if needed */
281 *(char*)data = 0;
282 barrier();
283 }
284 aiocbp->gref[j] = req->seg[j].gref =
285 gnttab_grant_access(dev->dom, virtual_to_mfn(data), write);
286 req->seg[j].first_sect = 0;
287 req->seg[j].last_sect = PAGE_SIZE / dev->sector_size - 1;
288 }
289 req->seg[0].first_sect = ((uintptr_t)aiocbp->aio_buf & ~PAGE_MASK) / dev->sector_size;
290 req->seg[n-1].last_sect = (((uintptr_t)aiocbp->aio_buf + aiocbp->aio_nbytes - 1) & ~PAGE_MASK) / dev->sector_size;
292 dev->ring.req_prod_pvt = i + 1;
294 wmb();
295 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&dev->ring, notify);
297 if(notify) notify_remote_via_evtchn(dev->evtchn);
298 }
300 void blkfront_aio_write(struct blkfront_aiocb *aiocbp)
301 {
302 blkfront_aio(aiocbp, 1);
303 }
305 void blkfront_aio_read(struct blkfront_aiocb *aiocbp)
306 {
307 blkfront_aio(aiocbp, 0);
308 }
310 int blkfront_aio_poll(struct blkfront_dev *dev)
311 {
312 RING_IDX rp, cons;
313 struct blkif_response *rsp;
315 moretodo:
316 rp = dev->ring.sring->rsp_prod;
317 rmb(); /* Ensure we see queued responses up to 'rp'. */
318 cons = dev->ring.rsp_cons;
320 int nr_consumed = 0;
321 while ((cons != rp))
322 {
323 rsp = RING_GET_RESPONSE(&dev->ring, cons);
325 switch (rsp->operation) {
326 case BLKIF_OP_READ:
327 case BLKIF_OP_WRITE:
328 {
329 struct blkfront_aiocb *aiocbp = (void*) (uintptr_t) rsp->id;
330 int j;
331 for (j = 0; j < aiocbp->n; j++)
332 gnttab_end_access(aiocbp->gref[j]);
334 /* Nota: callback frees aiocbp itself */
335 aiocbp->aio_cb(aiocbp, rsp->status ? -EIO : 0);
336 break;
337 }
338 case BLKIF_OP_WRITE_BARRIER:
339 case BLKIF_OP_FLUSH_DISKCACHE:
340 break;
341 default:
342 printk("unrecognized block operation %d response\n", rsp->operation);
343 break;
344 }
346 nr_consumed++;
347 ++cons;
348 }
349 dev->ring.rsp_cons = cons;
351 int more;
352 RING_FINAL_CHECK_FOR_RESPONSES(&dev->ring, more);
353 if (more) goto moretodo;
355 return nr_consumed;
356 }
358 static void blkfront_push_operation(struct blkfront_dev *dev, uint8_t op)
359 {
360 int i;
361 struct blkif_request *req;
362 int notify;
364 blkfront_wait_slot(dev);
365 i = dev->ring.req_prod_pvt;
366 req = RING_GET_REQUEST(&dev->ring, i);
367 req->operation = op;
368 dev->ring.req_prod_pvt = i + 1;
369 wmb();
370 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&dev->ring, notify);
371 if (notify) notify_remote_via_evtchn(dev->evtchn);
372 }
374 void blkfront_sync(struct blkfront_dev *dev)
375 {
376 unsigned long flags;
378 if (dev->barrier == 1)
379 blkfront_push_operation(dev, BLKIF_OP_WRITE_BARRIER);
381 if (dev->flush == 1)
382 blkfront_push_operation(dev, BLKIF_OP_FLUSH_DISKCACHE);
384 /* Note: This won't finish if another thread enqueues requests. */
385 local_irq_save(flags);
386 DEFINE_WAIT(w);
387 while (1) {
388 blkfront_aio_poll(dev);
389 if (RING_FREE_REQUESTS(&dev->ring) == RING_SIZE(&dev->ring))
390 break;
392 add_waiter(w, blkfront_queue);
393 local_irq_restore(flags);
394 schedule();
395 local_irq_save(flags);
396 }
397 remove_waiter(w);
398 local_irq_restore(flags);
399 }