ia64/xen-unstable

view extras/mini-os/blkfront.c @ 19836:d9890e67d2b7

tools: add SHAREDIR to buildmakevars2file

c/s 19818 dropped SHAREDIR from xen/util/path.py, which broke the "xm
new" command. This patch adds SHAREDIR back to the new
buildmakevars2file-closure function.

Signed-off-by: Ryan O'Connor <rjo@cs.ubc.ca>
author Keir Fraser <keir.fraser@citrix.com>
date Thu Jun 25 13:05:10 2009 +0100 (2009-06-25)
parents 18c8270da77c
children
line source
1 /* Minimal block driver for Mini-OS.
2 * Copyright (c) 2007-2008 Samuel Thibault.
3 * Based on netfront.c.
4 */
6 #include <os.h>
7 #include <xenbus.h>
8 #include <events.h>
9 #include <errno.h>
10 #include <xen/io/blkif.h>
11 #include <xen/io/protocols.h>
12 #include <gnttab.h>
13 #include <xmalloc.h>
14 #include <time.h>
15 #include <blkfront.h>
16 #include <lib.h>
17 #include <fcntl.h>
19 #ifndef HAVE_LIBC
20 #define strtoul simple_strtoul
21 #endif
23 /* Note: we generally don't need to disable IRQs since we hardly do anything in
24 * the interrupt handler. */
26 /* Note: we really suppose non-preemptive threads. */
28 DECLARE_WAIT_QUEUE_HEAD(blkfront_queue);
33 #define BLK_RING_SIZE __RING_SIZE((struct blkif_sring *)0, PAGE_SIZE)
34 #define GRANT_INVALID_REF 0
37 struct blk_buffer {
38 void* page;
39 grant_ref_t gref;
40 };
42 struct blkfront_dev {
43 domid_t dom;
45 struct blkif_front_ring ring;
46 grant_ref_t ring_ref;
47 evtchn_port_t evtchn;
48 blkif_vdev_t handle;
50 char *nodename;
51 char *backend;
52 struct blkfront_info info;
54 xenbus_event_queue events;
56 #ifdef HAVE_LIBC
57 int fd;
58 #endif
59 };
61 void blkfront_handler(evtchn_port_t port, struct pt_regs *regs, void *data)
62 {
63 #ifdef HAVE_LIBC
64 struct blkfront_dev *dev = data;
65 int fd = dev->fd;
67 if (fd != -1)
68 files[fd].read = 1;
69 #endif
70 wake_up(&blkfront_queue);
71 }
73 static void free_blkfront(struct blkfront_dev *dev)
74 {
75 mask_evtchn(dev->evtchn);
77 free(dev->backend);
79 gnttab_end_access(dev->ring_ref);
80 free_page(dev->ring.sring);
82 unbind_evtchn(dev->evtchn);
84 free(dev->nodename);
85 free(dev);
86 }
88 struct blkfront_dev *init_blkfront(char *_nodename, struct blkfront_info *info)
89 {
90 xenbus_transaction_t xbt;
91 char* err;
92 char* message=NULL;
93 struct blkif_sring *s;
94 int retry=0;
95 char* msg;
96 char* c;
97 char* nodename = _nodename ? _nodename : "device/vbd/768";
99 struct blkfront_dev *dev;
101 char path[strlen(nodename) + 1 + 10 + 1];
103 printk("******************* BLKFRONT for %s **********\n\n\n", nodename);
105 dev = malloc(sizeof(*dev));
106 memset(dev, 0, sizeof(*dev));
107 dev->nodename = strdup(nodename);
108 #ifdef HAVE_LIBC
109 dev->fd = -1;
110 #endif
112 snprintf(path, sizeof(path), "%s/backend-id", nodename);
113 dev->dom = xenbus_read_integer(path);
114 evtchn_alloc_unbound(dev->dom, blkfront_handler, dev, &dev->evtchn);
116 s = (struct blkif_sring*) alloc_page();
117 memset(s,0,PAGE_SIZE);
120 SHARED_RING_INIT(s);
121 FRONT_RING_INIT(&dev->ring, s, PAGE_SIZE);
123 dev->ring_ref = gnttab_grant_access(dev->dom,virt_to_mfn(s),0);
125 dev->events = NULL;
127 again:
128 err = xenbus_transaction_start(&xbt);
129 if (err) {
130 printk("starting transaction\n");
131 }
133 err = xenbus_printf(xbt, nodename, "ring-ref","%u",
134 dev->ring_ref);
135 if (err) {
136 message = "writing ring-ref";
137 goto abort_transaction;
138 }
139 err = xenbus_printf(xbt, nodename,
140 "event-channel", "%u", dev->evtchn);
141 if (err) {
142 message = "writing event-channel";
143 goto abort_transaction;
144 }
145 err = xenbus_printf(xbt, nodename,
146 "protocol", "%s", XEN_IO_PROTO_ABI_NATIVE);
147 if (err) {
148 message = "writing protocol";
149 goto abort_transaction;
150 }
152 snprintf(path, sizeof(path), "%s/state", nodename);
153 err = xenbus_switch_state(xbt, path, XenbusStateConnected);
154 if (err) {
155 message = "switching state";
156 goto abort_transaction;
157 }
160 err = xenbus_transaction_end(xbt, 0, &retry);
161 if (retry) {
162 goto again;
163 printk("completing transaction\n");
164 }
166 goto done;
168 abort_transaction:
169 xenbus_transaction_end(xbt, 1, &retry);
170 goto error;
172 done:
174 snprintf(path, sizeof(path), "%s/backend", nodename);
175 msg = xenbus_read(XBT_NIL, path, &dev->backend);
176 if (msg) {
177 printk("Error %s when reading the backend path %s\n", msg, path);
178 goto error;
179 }
181 printk("backend at %s\n", dev->backend);
183 dev->handle = strtoul(strrchr(nodename, '/')+1, NULL, 0);
185 {
186 XenbusState state;
187 char path[strlen(dev->backend) + 1 + 19 + 1];
188 snprintf(path, sizeof(path), "%s/mode", dev->backend);
189 msg = xenbus_read(XBT_NIL, path, &c);
190 if (msg) {
191 printk("Error %s when reading the mode\n", msg);
192 goto error;
193 }
194 if (*c == 'w')
195 dev->info.mode = O_RDWR;
196 else
197 dev->info.mode = O_RDONLY;
198 free(c);
200 snprintf(path, sizeof(path), "%s/state", dev->backend);
202 xenbus_watch_path_token(XBT_NIL, path, path, &dev->events);
204 msg = NULL;
205 state = xenbus_read_integer(path);
206 while (msg == NULL && state < XenbusStateConnected)
207 msg = xenbus_wait_for_state_change(path, &state, &dev->events);
208 if (msg != NULL || state != XenbusStateConnected) {
209 printk("backend not available, state=%d\n", state);
210 xenbus_unwatch_path(XBT_NIL, path);
211 goto error;
212 }
214 snprintf(path, sizeof(path), "%s/info", dev->backend);
215 dev->info.info = xenbus_read_integer(path);
217 snprintf(path, sizeof(path), "%s/sectors", dev->backend);
218 // FIXME: read_integer returns an int, so disk size limited to 1TB for now
219 dev->info.sectors = xenbus_read_integer(path);
221 snprintf(path, sizeof(path), "%s/sector-size", dev->backend);
222 dev->info.sector_size = xenbus_read_integer(path);
224 snprintf(path, sizeof(path), "%s/feature-barrier", dev->backend);
225 dev->info.barrier = xenbus_read_integer(path);
227 snprintf(path, sizeof(path), "%s/feature-flush-cache", dev->backend);
228 dev->info.flush = xenbus_read_integer(path);
230 *info = dev->info;
231 }
232 unmask_evtchn(dev->evtchn);
234 printk("%u sectors of %u bytes\n", dev->info.sectors, dev->info.sector_size);
235 printk("**************************\n");
237 return dev;
239 error:
240 free_blkfront(dev);
241 return NULL;
242 }
244 void shutdown_blkfront(struct blkfront_dev *dev)
245 {
246 char* err = NULL;
247 XenbusState state;
249 char path[strlen(dev->backend) + 1 + 5 + 1];
250 char nodename[strlen(dev->nodename) + 1 + 5 + 1];
252 blkfront_sync(dev);
254 printk("close blk: backend=%s node=%s\n", dev->backend, dev->nodename);
256 snprintf(path, sizeof(path), "%s/state", dev->backend);
257 snprintf(nodename, sizeof(nodename), "%s/state", dev->nodename);
259 if ((err = xenbus_switch_state(XBT_NIL, nodename, XenbusStateClosing)) != NULL) {
260 printk("shutdown_blkfront: error changing state to %d: %s\n",
261 XenbusStateClosing, err);
262 goto close;
263 }
264 state = xenbus_read_integer(path);
265 while (err == NULL && state < XenbusStateClosing)
266 err = xenbus_wait_for_state_change(path, &state, &dev->events);
268 if ((err = xenbus_switch_state(XBT_NIL, nodename, XenbusStateClosed)) != NULL) {
269 printk("shutdown_blkfront: error changing state to %d: %s\n",
270 XenbusStateClosed, err);
271 goto close;
272 }
273 state = xenbus_read_integer(path);
274 if (state < XenbusStateClosed)
275 xenbus_wait_for_state_change(path, &state, &dev->events);
277 if ((err = xenbus_switch_state(XBT_NIL, nodename, XenbusStateInitialising)) != NULL) {
278 printk("shutdown_blkfront: error changing state to %d: %s\n",
279 XenbusStateInitialising, err);
280 goto close;
281 }
282 err = NULL;
283 state = xenbus_read_integer(path);
284 while (err == NULL && (state < XenbusStateInitWait || state >= XenbusStateClosed))
285 err = xenbus_wait_for_state_change(path, &state, &dev->events);
287 close:
288 xenbus_unwatch_path(XBT_NIL, path);
290 snprintf(path, sizeof(path), "%s/ring-ref", nodename);
291 xenbus_rm(XBT_NIL, path);
292 snprintf(path, sizeof(path), "%s/event-channel", nodename);
293 xenbus_rm(XBT_NIL, path);
295 free_blkfront(dev);
296 }
298 static void blkfront_wait_slot(struct blkfront_dev *dev)
299 {
300 /* Wait for a slot */
301 if (RING_FULL(&dev->ring)) {
302 unsigned long flags;
303 DEFINE_WAIT(w);
304 local_irq_save(flags);
305 while (1) {
306 blkfront_aio_poll(dev);
307 if (!RING_FULL(&dev->ring))
308 break;
309 /* Really no slot, go to sleep. */
310 add_waiter(w, blkfront_queue);
311 local_irq_restore(flags);
312 schedule();
313 local_irq_save(flags);
314 }
315 remove_waiter(w);
316 local_irq_restore(flags);
317 }
318 }
320 /* Issue an aio */
321 void blkfront_aio(struct blkfront_aiocb *aiocbp, int write)
322 {
323 struct blkfront_dev *dev = aiocbp->aio_dev;
324 struct blkif_request *req;
325 RING_IDX i;
326 int notify;
327 int n, j;
328 uintptr_t start, end;
330 // Can't io at non-sector-aligned location
331 ASSERT(!(aiocbp->aio_offset & (dev->info.sector_size-1)));
332 // Can't io non-sector-sized amounts
333 ASSERT(!(aiocbp->aio_nbytes & (dev->info.sector_size-1)));
334 // Can't io non-sector-aligned buffer
335 ASSERT(!((uintptr_t) aiocbp->aio_buf & (dev->info.sector_size-1)));
337 start = (uintptr_t)aiocbp->aio_buf & PAGE_MASK;
338 end = ((uintptr_t)aiocbp->aio_buf + aiocbp->aio_nbytes + PAGE_SIZE - 1) & PAGE_MASK;
339 aiocbp->n = n = (end - start) / PAGE_SIZE;
341 /* qemu's IDE max multsect is 16 (8KB) and SCSI max DMA was set to 32KB,
342 * so max 44KB can't happen */
343 ASSERT(n <= BLKIF_MAX_SEGMENTS_PER_REQUEST);
345 blkfront_wait_slot(dev);
346 i = dev->ring.req_prod_pvt;
347 req = RING_GET_REQUEST(&dev->ring, i);
349 req->operation = write ? BLKIF_OP_WRITE : BLKIF_OP_READ;
350 req->nr_segments = n;
351 req->handle = dev->handle;
352 req->id = (uintptr_t) aiocbp;
353 req->sector_number = aiocbp->aio_offset / 512;
355 for (j = 0; j < n; j++) {
356 req->seg[j].first_sect = 0;
357 req->seg[j].last_sect = PAGE_SIZE / 512 - 1;
358 }
359 req->seg[0].first_sect = ((uintptr_t)aiocbp->aio_buf & ~PAGE_MASK) / 512;
360 req->seg[n-1].last_sect = (((uintptr_t)aiocbp->aio_buf + aiocbp->aio_nbytes - 1) & ~PAGE_MASK) / 512;
361 for (j = 0; j < n; j++) {
362 uintptr_t data = start + j * PAGE_SIZE;
363 if (!write) {
364 /* Trigger CoW if needed */
365 *(char*)(data + (req->seg[j].first_sect << 9)) = 0;
366 barrier();
367 }
368 aiocbp->gref[j] = req->seg[j].gref =
369 gnttab_grant_access(dev->dom, virtual_to_mfn(data), write);
370 }
372 dev->ring.req_prod_pvt = i + 1;
374 wmb();
375 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&dev->ring, notify);
377 if(notify) notify_remote_via_evtchn(dev->evtchn);
378 }
380 static void blkfront_aio_cb(struct blkfront_aiocb *aiocbp, int ret)
381 {
382 aiocbp->data = (void*) 1;
383 }
385 void blkfront_io(struct blkfront_aiocb *aiocbp, int write)
386 {
387 unsigned long flags;
388 DEFINE_WAIT(w);
390 ASSERT(!aiocbp->aio_cb);
391 aiocbp->aio_cb = blkfront_aio_cb;
392 blkfront_aio(aiocbp, write);
393 aiocbp->data = NULL;
395 local_irq_save(flags);
396 while (1) {
397 blkfront_aio_poll(aiocbp->aio_dev);
398 if (aiocbp->data)
399 break;
401 add_waiter(w, blkfront_queue);
402 local_irq_restore(flags);
403 schedule();
404 local_irq_save(flags);
405 }
406 remove_waiter(w);
407 local_irq_restore(flags);
408 }
410 static void blkfront_push_operation(struct blkfront_dev *dev, uint8_t op, uint64_t id)
411 {
412 int i;
413 struct blkif_request *req;
414 int notify;
416 blkfront_wait_slot(dev);
417 i = dev->ring.req_prod_pvt;
418 req = RING_GET_REQUEST(&dev->ring, i);
419 req->operation = op;
420 req->nr_segments = 0;
421 req->handle = dev->handle;
422 req->id = id;
423 /* Not needed anyway, but the backend will check it */
424 req->sector_number = 0;
425 dev->ring.req_prod_pvt = i + 1;
426 wmb();
427 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&dev->ring, notify);
428 if (notify) notify_remote_via_evtchn(dev->evtchn);
429 }
431 void blkfront_aio_push_operation(struct blkfront_aiocb *aiocbp, uint8_t op)
432 {
433 struct blkfront_dev *dev = aiocbp->aio_dev;
434 blkfront_push_operation(dev, op, (uintptr_t) aiocbp);
435 }
437 void blkfront_sync(struct blkfront_dev *dev)
438 {
439 unsigned long flags;
440 DEFINE_WAIT(w);
442 if (dev->info.mode == O_RDWR) {
443 if (dev->info.barrier == 1)
444 blkfront_push_operation(dev, BLKIF_OP_WRITE_BARRIER, 0);
446 if (dev->info.flush == 1)
447 blkfront_push_operation(dev, BLKIF_OP_FLUSH_DISKCACHE, 0);
448 }
450 /* Note: This won't finish if another thread enqueues requests. */
451 local_irq_save(flags);
452 while (1) {
453 blkfront_aio_poll(dev);
454 if (RING_FREE_REQUESTS(&dev->ring) == RING_SIZE(&dev->ring))
455 break;
457 add_waiter(w, blkfront_queue);
458 local_irq_restore(flags);
459 schedule();
460 local_irq_save(flags);
461 }
462 remove_waiter(w);
463 local_irq_restore(flags);
464 }
466 int blkfront_aio_poll(struct blkfront_dev *dev)
467 {
468 RING_IDX rp, cons;
469 struct blkif_response *rsp;
470 int more;
471 int nr_consumed;
473 moretodo:
474 #ifdef HAVE_LIBC
475 if (dev->fd != -1) {
476 files[dev->fd].read = 0;
477 mb(); /* Make sure to let the handler set read to 1 before we start looking at the ring */
478 }
479 #endif
481 rp = dev->ring.sring->rsp_prod;
482 rmb(); /* Ensure we see queued responses up to 'rp'. */
483 cons = dev->ring.rsp_cons;
485 nr_consumed = 0;
486 while ((cons != rp))
487 {
488 struct blkfront_aiocb *aiocbp;
489 int status;
491 rsp = RING_GET_RESPONSE(&dev->ring, cons);
492 nr_consumed++;
494 aiocbp = (void*) (uintptr_t) rsp->id;
495 status = rsp->status;
497 if (status != BLKIF_RSP_OKAY)
498 printk("block error %d for op %d\n", status, rsp->operation);
500 switch (rsp->operation) {
501 case BLKIF_OP_READ:
502 case BLKIF_OP_WRITE:
503 {
504 int j;
506 for (j = 0; j < aiocbp->n; j++)
507 gnttab_end_access(aiocbp->gref[j]);
509 break;
510 }
512 case BLKIF_OP_WRITE_BARRIER:
513 case BLKIF_OP_FLUSH_DISKCACHE:
514 break;
516 default:
517 printk("unrecognized block operation %d response\n", rsp->operation);
518 }
520 dev->ring.rsp_cons = ++cons;
521 /* Nota: callback frees aiocbp itself */
522 if (aiocbp && aiocbp->aio_cb)
523 aiocbp->aio_cb(aiocbp, status ? -EIO : 0);
524 if (dev->ring.rsp_cons != cons)
525 /* We reentered, we must not continue here */
526 break;
527 }
529 RING_FINAL_CHECK_FOR_RESPONSES(&dev->ring, more);
530 if (more) goto moretodo;
532 return nr_consumed;
533 }
535 #ifdef HAVE_LIBC
536 int blkfront_open(struct blkfront_dev *dev)
537 {
538 dev->fd = alloc_fd(FTYPE_BLK);
539 printk("blk_open(%s) -> %d\n", dev->nodename, dev->fd);
540 files[dev->fd].blk.dev = dev;
541 return dev->fd;
542 }
543 #endif