ia64/xen-unstable

view extras/mini-os/blkfront.c @ 17790:49a0d28a16e0

minios: add a blkfront synchronous interface

Signed-off-by: Samuel Thibault <samuel.thibault@eu.citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Thu Jun 05 13:03:31 2008 +0100 (2008-06-05)
parents c6533d246a81
children 2197a263a300
line source
1 /* Minimal block driver for Mini-OS.
2 * Copyright (c) 2007-2008 Samuel Thibault.
3 * Based on netfront.c.
4 */
6 #include <os.h>
7 #include <xenbus.h>
8 #include <events.h>
9 #include <errno.h>
10 #include <xen/io/blkif.h>
11 #include <gnttab.h>
12 #include <xmalloc.h>
13 #include <time.h>
14 #include <blkfront.h>
15 #include <lib.h>
16 #include <fcntl.h>
18 #ifndef HAVE_LIBC
19 #define strtoul simple_strtoul
20 #endif
22 /* Note: we generally don't need to disable IRQs since we hardly do anything in
23 * the interrupt handler. */
25 /* Note: we really suppose non-preemptive threads. */
27 DECLARE_WAIT_QUEUE_HEAD(blkfront_queue);
32 #define BLK_RING_SIZE __RING_SIZE((struct blkif_sring *)0, PAGE_SIZE)
33 #define GRANT_INVALID_REF 0
36 struct blk_buffer {
37 void* page;
38 grant_ref_t gref;
39 };
41 struct blkfront_dev {
42 domid_t dom;
44 struct blkif_front_ring ring;
45 grant_ref_t ring_ref;
46 evtchn_port_t evtchn;
47 blkif_vdev_t handle;
49 char *nodename;
50 char *backend;
51 struct blkfront_info info;
53 xenbus_event_queue events;
55 #ifdef HAVE_LIBC
56 int fd;
57 #endif
58 };
60 void blkfront_handler(evtchn_port_t port, struct pt_regs *regs, void *data)
61 {
62 #ifdef HAVE_LIBC
63 struct blkfront_dev *dev = data;
64 int fd = dev->fd;
66 files[fd].read = 1;
67 #endif
68 wake_up(&blkfront_queue);
69 }
71 static void free_blkfront(struct blkfront_dev *dev)
72 {
73 mask_evtchn(dev->evtchn);
75 free(dev->backend);
77 gnttab_end_access(dev->ring_ref);
78 free_page(dev->ring.sring);
80 unbind_evtchn(dev->evtchn);
82 free(dev->nodename);
83 free(dev);
84 }
86 struct blkfront_dev *init_blkfront(char *nodename, struct blkfront_info *info)
87 {
88 xenbus_transaction_t xbt;
89 char* err;
90 char* message=NULL;
91 struct blkif_sring *s;
92 int retry=0;
93 char* msg;
94 char* c;
96 struct blkfront_dev *dev;
98 if (!nodename)
99 nodename = "device/vbd/768";
101 char path[strlen(nodename) + 1 + 10 + 1];
103 printk("******************* BLKFRONT for %s **********\n\n\n", nodename);
105 dev = malloc(sizeof(*dev));
106 memset(dev, 0, sizeof(*dev));
107 dev->nodename = strdup(nodename);
109 snprintf(path, sizeof(path), "%s/backend-id", nodename);
110 dev->dom = xenbus_read_integer(path);
111 evtchn_alloc_unbound(dev->dom, blkfront_handler, dev, &dev->evtchn);
113 s = (struct blkif_sring*) alloc_page();
114 memset(s,0,PAGE_SIZE);
117 SHARED_RING_INIT(s);
118 FRONT_RING_INIT(&dev->ring, s, PAGE_SIZE);
120 dev->ring_ref = gnttab_grant_access(dev->dom,virt_to_mfn(s),0);
122 dev->events = NULL;
124 // FIXME: proper frees on failures
125 again:
126 err = xenbus_transaction_start(&xbt);
127 if (err) {
128 printk("starting transaction\n");
129 }
131 err = xenbus_printf(xbt, nodename, "ring-ref","%u",
132 dev->ring_ref);
133 if (err) {
134 message = "writing ring-ref";
135 goto abort_transaction;
136 }
137 err = xenbus_printf(xbt, nodename,
138 "event-channel", "%u", dev->evtchn);
139 if (err) {
140 message = "writing event-channel";
141 goto abort_transaction;
142 }
144 err = xenbus_printf(xbt, nodename, "state", "%u",
145 4); /* connected */
148 err = xenbus_transaction_end(xbt, 0, &retry);
149 if (retry) {
150 goto again;
151 printk("completing transaction\n");
152 }
154 goto done;
156 abort_transaction:
157 xenbus_transaction_end(xbt, 1, &retry);
158 goto error;
160 done:
162 snprintf(path, sizeof(path), "%s/backend", nodename);
163 msg = xenbus_read(XBT_NIL, path, &dev->backend);
164 if (msg) {
165 printk("Error %s when reading the backend path %s\n", msg, path);
166 goto error;
167 }
169 printk("backend at %s\n", dev->backend);
171 dev->handle = strtoul(strrchr(nodename, '/')+1, NULL, 0);
173 {
174 char path[strlen(dev->backend) + 1 + 19 + 1];
175 snprintf(path, sizeof(path), "%s/mode", dev->backend);
176 msg = xenbus_read(XBT_NIL, path, &c);
177 if (msg) {
178 printk("Error %s when reading the mode\n", msg);
179 goto error;
180 }
181 if (*c == 'w')
182 dev->info.mode = O_RDWR;
183 else
184 dev->info.mode = O_RDONLY;
185 free(c);
187 snprintf(path, sizeof(path), "%s/state", dev->backend);
189 xenbus_watch_path_token(XBT_NIL, path, path, &dev->events);
191 xenbus_wait_for_value(path, "4", &dev->events);
193 snprintf(path, sizeof(path), "%s/info", dev->backend);
194 dev->info.info = xenbus_read_integer(path);
196 snprintf(path, sizeof(path), "%s/sectors", dev->backend);
197 // FIXME: read_integer returns an int, so disk size limited to 1TB for now
198 dev->info.sectors = xenbus_read_integer(path);
200 snprintf(path, sizeof(path), "%s/sector-size", dev->backend);
201 dev->info.sector_size = xenbus_read_integer(path);
203 snprintf(path, sizeof(path), "%s/feature-barrier", dev->backend);
204 dev->info.barrier = xenbus_read_integer(path);
206 snprintf(path, sizeof(path), "%s/feature-flush-cache", dev->backend);
207 dev->info.flush = xenbus_read_integer(path);
209 *info = dev->info;
210 }
211 unmask_evtchn(dev->evtchn);
213 printk("%u sectors of %u bytes\n", dev->info.sectors, dev->info.sector_size);
214 printk("**************************\n");
216 return dev;
218 error:
219 free_blkfront(dev);
220 return NULL;
221 }
223 void shutdown_blkfront(struct blkfront_dev *dev)
224 {
225 char* err;
226 char *nodename = dev->nodename;
228 char path[strlen(dev->backend) + 1 + 5 + 1];
230 blkfront_sync(dev);
232 printk("close blk: backend at %s\n",dev->backend);
234 snprintf(path, sizeof(path), "%s/state", dev->backend);
235 err = xenbus_printf(XBT_NIL, nodename, "state", "%u", 5); /* closing */
236 xenbus_wait_for_value(path, "5", &dev->events);
238 err = xenbus_printf(XBT_NIL, nodename, "state", "%u", 6);
239 xenbus_wait_for_value(path, "6", &dev->events);
241 xenbus_unwatch_path(XBT_NIL, path);
243 free_blkfront(dev);
244 }
246 static void blkfront_wait_slot(struct blkfront_dev *dev)
247 {
248 /* Wait for a slot */
249 if (RING_FULL(&dev->ring)) {
250 unsigned long flags;
251 DEFINE_WAIT(w);
252 local_irq_save(flags);
253 while (1) {
254 blkfront_aio_poll(dev);
255 if (!RING_FULL(&dev->ring))
256 break;
257 /* Really no slot, go to sleep. */
258 add_waiter(w, blkfront_queue);
259 local_irq_restore(flags);
260 schedule();
261 local_irq_save(flags);
262 }
263 remove_waiter(w);
264 local_irq_restore(flags);
265 }
266 }
268 /* Issue an aio */
269 void blkfront_aio(struct blkfront_aiocb *aiocbp, int write)
270 {
271 struct blkfront_dev *dev = aiocbp->aio_dev;
272 struct blkif_request *req;
273 RING_IDX i;
274 int notify;
275 int n, j;
276 uintptr_t start, end;
278 // Can't io at non-sector-aligned location
279 ASSERT(!(aiocbp->aio_offset & (dev->info.sector_size-1)));
280 // Can't io non-sector-sized amounts
281 ASSERT(!(aiocbp->aio_nbytes & (dev->info.sector_size-1)));
282 // Can't io non-sector-aligned buffer
283 ASSERT(!((uintptr_t) aiocbp->aio_buf & (dev->info.sector_size-1)));
285 start = (uintptr_t)aiocbp->aio_buf & PAGE_MASK;
286 end = ((uintptr_t)aiocbp->aio_buf + aiocbp->aio_nbytes + PAGE_SIZE - 1) & PAGE_MASK;
287 aiocbp->n = n = (end - start) / PAGE_SIZE;
289 /* qemu's IDE max multsect is 16 (8KB) and SCSI max DMA was set to 32KB,
290 * so max 44KB can't happen */
291 ASSERT(n <= BLKIF_MAX_SEGMENTS_PER_REQUEST);
293 blkfront_wait_slot(dev);
294 i = dev->ring.req_prod_pvt;
295 req = RING_GET_REQUEST(&dev->ring, i);
297 req->operation = write ? BLKIF_OP_WRITE : BLKIF_OP_READ;
298 req->nr_segments = n;
299 req->handle = dev->handle;
300 req->id = (uintptr_t) aiocbp;
301 req->sector_number = aiocbp->aio_offset / dev->info.sector_size;
303 for (j = 0; j < n; j++) {
304 uintptr_t data = start + j * PAGE_SIZE;
305 if (!write) {
306 /* Trigger CoW if needed */
307 *(char*)data = 0;
308 barrier();
309 }
310 aiocbp->gref[j] = req->seg[j].gref =
311 gnttab_grant_access(dev->dom, virtual_to_mfn(data), write);
312 req->seg[j].first_sect = 0;
313 req->seg[j].last_sect = PAGE_SIZE / dev->info.sector_size - 1;
314 }
315 req->seg[0].first_sect = ((uintptr_t)aiocbp->aio_buf & ~PAGE_MASK) / dev->info.sector_size;
316 req->seg[n-1].last_sect = (((uintptr_t)aiocbp->aio_buf + aiocbp->aio_nbytes - 1) & ~PAGE_MASK) / dev->info.sector_size;
318 dev->ring.req_prod_pvt = i + 1;
320 wmb();
321 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&dev->ring, notify);
323 if(notify) notify_remote_via_evtchn(dev->evtchn);
324 }
326 static void blkfront_aio_cb(struct blkfront_aiocb *aiocbp, int ret)
327 {
328 aiocbp->data = (void*) 1;
329 }
331 void blkfront_io(struct blkfront_aiocb *aiocbp, int write)
332 {
333 unsigned long flags;
334 ASSERT(!aiocbp->aio_cb);
335 aiocbp->aio_cb = blkfront_aio_cb;
336 blkfront_aio(aiocbp, write);
337 aiocbp->data = NULL;
339 local_irq_save(flags);
340 DEFINE_WAIT(w);
341 while (1) {
342 blkfront_aio_poll(aiocbp->aio_dev);
343 if (aiocbp->data)
344 break;
346 add_waiter(w, blkfront_queue);
347 local_irq_restore(flags);
348 schedule();
349 local_irq_save(flags);
350 }
351 remove_waiter(w);
352 local_irq_restore(flags);
353 }
355 static void blkfront_push_operation(struct blkfront_dev *dev, uint8_t op, uint64_t id)
356 {
357 int i;
358 struct blkif_request *req;
359 int notify;
361 blkfront_wait_slot(dev);
362 i = dev->ring.req_prod_pvt;
363 req = RING_GET_REQUEST(&dev->ring, i);
364 req->operation = op;
365 req->nr_segments = 0;
366 req->handle = dev->handle;
367 req->id = id;
368 /* Not needed anyway, but the backend will check it */
369 req->sector_number = 0;
370 dev->ring.req_prod_pvt = i + 1;
371 wmb();
372 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&dev->ring, notify);
373 if (notify) notify_remote_via_evtchn(dev->evtchn);
374 }
376 void blkfront_aio_push_operation(struct blkfront_aiocb *aiocbp, uint8_t op)
377 {
378 struct blkfront_dev *dev = aiocbp->aio_dev;
379 blkfront_push_operation(dev, op, (uintptr_t) aiocbp);
380 }
382 void blkfront_sync(struct blkfront_dev *dev)
383 {
384 unsigned long flags;
386 if (dev->info.mode == O_RDWR) {
387 if (dev->info.barrier == 1)
388 blkfront_push_operation(dev, BLKIF_OP_WRITE_BARRIER, 0);
390 if (dev->info.flush == 1)
391 blkfront_push_operation(dev, BLKIF_OP_FLUSH_DISKCACHE, 0);
392 }
394 /* Note: This won't finish if another thread enqueues requests. */
395 local_irq_save(flags);
396 DEFINE_WAIT(w);
397 while (1) {
398 blkfront_aio_poll(dev);
399 if (RING_FREE_REQUESTS(&dev->ring) == RING_SIZE(&dev->ring))
400 break;
402 add_waiter(w, blkfront_queue);
403 local_irq_restore(flags);
404 schedule();
405 local_irq_save(flags);
406 }
407 remove_waiter(w);
408 local_irq_restore(flags);
409 }
411 int blkfront_aio_poll(struct blkfront_dev *dev)
412 {
413 RING_IDX rp, cons;
414 struct blkif_response *rsp;
415 int more;
417 moretodo:
418 #ifdef HAVE_LIBC
419 files[dev->fd].read = 0;
420 mb(); /* Make sure to let the handler set read to 1 before we start looking at the ring */
421 #endif
423 rp = dev->ring.sring->rsp_prod;
424 rmb(); /* Ensure we see queued responses up to 'rp'. */
425 cons = dev->ring.rsp_cons;
427 int nr_consumed = 0;
428 while ((cons != rp))
429 {
430 rsp = RING_GET_RESPONSE(&dev->ring, cons);
431 nr_consumed++;
433 struct blkfront_aiocb *aiocbp = (void*) (uintptr_t) rsp->id;
434 int status = rsp->status;
436 if (status != BLKIF_RSP_OKAY)
437 printk("block error %d for op %d\n", status, rsp->operation);
439 switch (rsp->operation) {
440 case BLKIF_OP_READ:
441 case BLKIF_OP_WRITE:
442 {
443 int j;
445 for (j = 0; j < aiocbp->n; j++)
446 gnttab_end_access(aiocbp->gref[j]);
448 break;
449 }
451 case BLKIF_OP_WRITE_BARRIER:
452 case BLKIF_OP_FLUSH_DISKCACHE:
453 break;
455 default:
456 printk("unrecognized block operation %d response\n", rsp->operation);
457 }
459 dev->ring.rsp_cons = ++cons;
460 /* Nota: callback frees aiocbp itself */
461 if (aiocbp && aiocbp->aio_cb)
462 aiocbp->aio_cb(aiocbp, status ? -EIO : 0);
463 if (dev->ring.rsp_cons != cons)
464 /* We reentered, we must not continue here */
465 break;
466 }
468 RING_FINAL_CHECK_FOR_RESPONSES(&dev->ring, more);
469 if (more) goto moretodo;
471 return nr_consumed;
472 }
474 #ifdef HAVE_LIBC
475 int blkfront_open(struct blkfront_dev *dev)
476 {
477 dev->fd = alloc_fd(FTYPE_BLK);
478 printk("blk_open(%s) -> %d\n", dev->nodename, dev->fd);
479 files[dev->fd].blk.dev = dev;
480 return dev->fd;
481 }
482 #endif