ia64/xen-unstable

view extras/mini-os/blkfront.c @ 17397:6bf674bd386d

stubdom: add asynchronous disk flush support

Signed-off-by: Samuel Thibault <samuel.thibault@eu.citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Sat Apr 05 22:20:25 2008 +0100 (2008-04-05)
parents 6327432958e6
children a3bddc22d2f5
line source
1 /* Minimal block driver for Mini-OS.
2 * Copyright (c) 2007-2008 Samuel Thibault.
3 * Based on netfront.c.
4 */
6 #include <os.h>
7 #include <xenbus.h>
8 #include <events.h>
9 #include <errno.h>
10 #include <xen/io/blkif.h>
11 #include <gnttab.h>
12 #include <xmalloc.h>
13 #include <time.h>
14 #include <blkfront.h>
15 #include <lib.h>
16 #include <fcntl.h>
18 #ifndef HAVE_LIBC
19 #define strtoul simple_strtoul
20 #endif
22 /* Note: we generally don't need to disable IRQs since we hardly do anything in
23 * the interrupt handler. */
25 /* Note: we really suppose non-preemptive threads. */
27 DECLARE_WAIT_QUEUE_HEAD(blkfront_queue);
32 #define BLK_RING_SIZE __RING_SIZE((struct blkif_sring *)0, PAGE_SIZE)
33 #define GRANT_INVALID_REF 0
36 struct blk_buffer {
37 void* page;
38 grant_ref_t gref;
39 };
41 struct blkfront_dev {
42 domid_t dom;
44 struct blkif_front_ring ring;
45 grant_ref_t ring_ref;
46 evtchn_port_t evtchn;
47 blkif_vdev_t handle;
49 char *nodename;
50 char *backend;
51 struct blkfront_info info;
53 #ifdef HAVE_LIBC
54 int fd;
55 #endif
56 };
58 void blkfront_handler(evtchn_port_t port, struct pt_regs *regs, void *data)
59 {
60 #ifdef HAVE_LIBC
61 struct blkfront_dev *dev = data;
62 int fd = dev->fd;
64 files[fd].read = 1;
65 #endif
66 wake_up(&blkfront_queue);
67 }
69 struct blkfront_dev *init_blkfront(char *nodename, struct blkfront_info *info)
70 {
71 xenbus_transaction_t xbt;
72 char* err;
73 char* message=NULL;
74 struct blkif_sring *s;
75 int retry=0;
76 char* msg;
77 char* c;
79 struct blkfront_dev *dev;
81 if (!nodename)
82 nodename = "device/vbd/768";
84 char path[strlen(nodename) + 1 + 10 + 1];
86 printk("******************* BLKFRONT for %s **********\n\n\n", nodename);
88 dev = malloc(sizeof(*dev));
89 dev->nodename = strdup(nodename);
91 snprintf(path, sizeof(path), "%s/backend-id", nodename);
92 dev->dom = xenbus_read_integer(path);
93 evtchn_alloc_unbound(dev->dom, blkfront_handler, dev, &dev->evtchn);
95 s = (struct blkif_sring*) alloc_page();
96 memset(s,0,PAGE_SIZE);
99 SHARED_RING_INIT(s);
100 FRONT_RING_INIT(&dev->ring, s, PAGE_SIZE);
102 dev->ring_ref = gnttab_grant_access(dev->dom,virt_to_mfn(s),0);
104 // FIXME: proper frees on failures
105 again:
106 err = xenbus_transaction_start(&xbt);
107 if (err) {
108 printk("starting transaction\n");
109 }
111 err = xenbus_printf(xbt, nodename, "ring-ref","%u",
112 dev->ring_ref);
113 if (err) {
114 message = "writing ring-ref";
115 goto abort_transaction;
116 }
117 err = xenbus_printf(xbt, nodename,
118 "event-channel", "%u", dev->evtchn);
119 if (err) {
120 message = "writing event-channel";
121 goto abort_transaction;
122 }
124 err = xenbus_printf(xbt, nodename, "state", "%u",
125 4); /* connected */
128 err = xenbus_transaction_end(xbt, 0, &retry);
129 if (retry) {
130 goto again;
131 printk("completing transaction\n");
132 }
134 goto done;
136 abort_transaction:
137 xenbus_transaction_end(xbt, 1, &retry);
138 return NULL;
140 done:
142 snprintf(path, sizeof(path), "%s/backend", nodename);
143 msg = xenbus_read(XBT_NIL, path, &dev->backend);
144 if (msg) {
145 printk("Error %s when reading the backend path %s\n", msg, path);
146 return NULL;
147 }
149 printk("backend at %s\n", dev->backend);
151 dev->handle = strtoul(strrchr(nodename, '/')+1, NULL, 0);
153 {
154 char path[strlen(dev->backend) + 1 + 19 + 1];
155 snprintf(path, sizeof(path), "%s/mode", dev->backend);
156 msg = xenbus_read(XBT_NIL, path, &c);
157 if (msg) {
158 printk("Error %s when reading the mode\n", msg);
159 return NULL;
160 }
161 if (*c == 'w')
162 dev->info.mode = O_RDWR;
163 else
164 dev->info.mode = O_RDONLY;
165 free(c);
167 snprintf(path, sizeof(path), "%s/state", dev->backend);
169 xenbus_watch_path(XBT_NIL, path);
171 xenbus_wait_for_value(path,"4");
173 xenbus_unwatch_path(XBT_NIL, path);
175 snprintf(path, sizeof(path), "%s/info", dev->backend);
176 dev->info.info = xenbus_read_integer(path);
178 snprintf(path, sizeof(path), "%s/sectors", dev->backend);
179 // FIXME: read_integer returns an int, so disk size limited to 1TB for now
180 dev->info.sectors = xenbus_read_integer(path);
182 snprintf(path, sizeof(path), "%s/sector-size", dev->backend);
183 dev->info.sector_size = xenbus_read_integer(path);
185 snprintf(path, sizeof(path), "%s/feature-barrier", dev->backend);
186 dev->info.barrier = xenbus_read_integer(path);
188 snprintf(path, sizeof(path), "%s/feature-flush-cache", dev->backend);
189 dev->info.flush = xenbus_read_integer(path);
191 *info = dev->info;
192 }
193 unmask_evtchn(dev->evtchn);
195 printk("%u sectors of %u bytes\n", dev->info.sectors, dev->info.sector_size);
196 printk("**************************\n");
198 return dev;
199 }
201 void shutdown_blkfront(struct blkfront_dev *dev)
202 {
203 char* err;
204 char *nodename = dev->nodename;
206 char path[strlen(dev->backend) + 1 + 5 + 1];
208 blkfront_sync(dev);
210 printk("close blk: backend at %s\n",dev->backend);
212 snprintf(path, sizeof(path), "%s/state", dev->backend);
213 err = xenbus_printf(XBT_NIL, nodename, "state", "%u", 5); /* closing */
214 xenbus_wait_for_value(path,"5");
216 err = xenbus_printf(XBT_NIL, nodename, "state", "%u", 6);
217 xenbus_wait_for_value(path,"6");
219 unbind_evtchn(dev->evtchn);
221 free(nodename);
222 free(dev->backend);
223 free(dev);
224 }
226 static void blkfront_wait_slot(struct blkfront_dev *dev)
227 {
228 /* Wait for a slot */
229 if (RING_FULL(&dev->ring)) {
230 unsigned long flags;
231 DEFINE_WAIT(w);
232 local_irq_save(flags);
233 while (1) {
234 blkfront_aio_poll(dev);
235 if (!RING_FULL(&dev->ring))
236 break;
237 /* Really no slot, go to sleep. */
238 add_waiter(w, blkfront_queue);
239 local_irq_restore(flags);
240 schedule();
241 local_irq_save(flags);
242 }
243 remove_waiter(w);
244 local_irq_restore(flags);
245 }
246 }
248 /* Issue an aio */
249 void blkfront_aio(struct blkfront_aiocb *aiocbp, int write)
250 {
251 struct blkfront_dev *dev = aiocbp->aio_dev;
252 struct blkif_request *req;
253 RING_IDX i;
254 int notify;
255 int n, j;
256 uintptr_t start, end;
258 // Can't io at non-sector-aligned location
259 ASSERT(!(aiocbp->aio_offset & (dev->info.sector_size-1)));
260 // Can't io non-sector-sized amounts
261 ASSERT(!(aiocbp->aio_nbytes & (dev->info.sector_size-1)));
262 // Can't io non-sector-aligned buffer
263 ASSERT(!((uintptr_t) aiocbp->aio_buf & (dev->info.sector_size-1)));
265 start = (uintptr_t)aiocbp->aio_buf & PAGE_MASK;
266 end = ((uintptr_t)aiocbp->aio_buf + aiocbp->aio_nbytes + PAGE_SIZE - 1) & PAGE_MASK;
267 aiocbp->n = n = (end - start) / PAGE_SIZE;
269 /* qemu's IDE max multsect is 16 (8KB) and SCSI max DMA was set to 32KB,
270 * so max 44KB can't happen */
271 ASSERT(n <= BLKIF_MAX_SEGMENTS_PER_REQUEST);
273 blkfront_wait_slot(dev);
274 i = dev->ring.req_prod_pvt;
275 req = RING_GET_REQUEST(&dev->ring, i);
277 req->operation = write ? BLKIF_OP_WRITE : BLKIF_OP_READ;
278 req->nr_segments = n;
279 req->handle = dev->handle;
280 req->id = (uintptr_t) aiocbp;
281 req->sector_number = aiocbp->aio_offset / dev->info.sector_size;
283 for (j = 0; j < n; j++) {
284 uintptr_t data = start + j * PAGE_SIZE;
285 if (!write) {
286 /* Trigger CoW if needed */
287 *(char*)data = 0;
288 barrier();
289 }
290 aiocbp->gref[j] = req->seg[j].gref =
291 gnttab_grant_access(dev->dom, virtual_to_mfn(data), write);
292 req->seg[j].first_sect = 0;
293 req->seg[j].last_sect = PAGE_SIZE / dev->info.sector_size - 1;
294 }
295 req->seg[0].first_sect = ((uintptr_t)aiocbp->aio_buf & ~PAGE_MASK) / dev->info.sector_size;
296 req->seg[n-1].last_sect = (((uintptr_t)aiocbp->aio_buf + aiocbp->aio_nbytes - 1) & ~PAGE_MASK) / dev->info.sector_size;
298 dev->ring.req_prod_pvt = i + 1;
300 wmb();
301 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&dev->ring, notify);
303 if(notify) notify_remote_via_evtchn(dev->evtchn);
304 }
306 void blkfront_aio_write(struct blkfront_aiocb *aiocbp)
307 {
308 blkfront_aio(aiocbp, 1);
309 }
311 void blkfront_aio_read(struct blkfront_aiocb *aiocbp)
312 {
313 blkfront_aio(aiocbp, 0);
314 }
316 static void blkfront_push_operation(struct blkfront_dev *dev, uint8_t op, uint64_t id)
317 {
318 int i;
319 struct blkif_request *req;
320 int notify;
322 blkfront_wait_slot(dev);
323 i = dev->ring.req_prod_pvt;
324 req = RING_GET_REQUEST(&dev->ring, i);
325 req->operation = op;
326 req->nr_segments = 0;
327 req->handle = dev->handle;
328 req->id = id;
329 /* Not needed anyway, but the backend will check it */
330 req->sector_number = 0;
331 dev->ring.req_prod_pvt = i + 1;
332 wmb();
333 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&dev->ring, notify);
334 if (notify) notify_remote_via_evtchn(dev->evtchn);
335 }
337 void blkfront_aio_push_operation(struct blkfront_aiocb *aiocbp, uint8_t op)
338 {
339 struct blkfront_dev *dev = aiocbp->aio_dev;
340 blkfront_push_operation(dev, op, (uintptr_t) aiocbp);
341 }
343 void blkfront_sync(struct blkfront_dev *dev)
344 {
345 unsigned long flags;
347 if (dev->info.mode == O_RDWR) {
348 if (dev->info.barrier == 1)
349 blkfront_push_operation(dev, BLKIF_OP_WRITE_BARRIER, 0);
351 if (dev->info.flush == 1)
352 blkfront_push_operation(dev, BLKIF_OP_FLUSH_DISKCACHE, 0);
353 }
355 /* Note: This won't finish if another thread enqueues requests. */
356 local_irq_save(flags);
357 DEFINE_WAIT(w);
358 while (1) {
359 blkfront_aio_poll(dev);
360 if (RING_FREE_REQUESTS(&dev->ring) == RING_SIZE(&dev->ring))
361 break;
363 add_waiter(w, blkfront_queue);
364 local_irq_restore(flags);
365 schedule();
366 local_irq_save(flags);
367 }
368 remove_waiter(w);
369 local_irq_restore(flags);
370 }
372 int blkfront_aio_poll(struct blkfront_dev *dev)
373 {
374 RING_IDX rp, cons;
375 struct blkif_response *rsp;
376 int more;
378 moretodo:
379 #ifdef HAVE_LIBC
380 files[dev->fd].read = 0;
381 mb(); /* Make sure to let the handler set read to 1 before we start looking at the ring */
382 #endif
384 rp = dev->ring.sring->rsp_prod;
385 rmb(); /* Ensure we see queued responses up to 'rp'. */
386 cons = dev->ring.rsp_cons;
388 int nr_consumed = 0;
389 while ((cons != rp))
390 {
391 rsp = RING_GET_RESPONSE(&dev->ring, cons);
392 nr_consumed++;
394 struct blkfront_aiocb *aiocbp = (void*) (uintptr_t) rsp->id;
395 int status = rsp->status;
397 if (status != BLKIF_RSP_OKAY)
398 printk("block error %d for op %d\n", status, rsp->operation);
400 switch (rsp->operation) {
401 case BLKIF_OP_READ:
402 case BLKIF_OP_WRITE:
403 {
404 int j;
406 for (j = 0; j < aiocbp->n; j++)
407 gnttab_end_access(aiocbp->gref[j]);
409 break;
410 }
412 case BLKIF_OP_WRITE_BARRIER:
413 case BLKIF_OP_FLUSH_DISKCACHE:
414 break;
416 default:
417 printk("unrecognized block operation %d response\n", rsp->operation);
418 }
420 dev->ring.rsp_cons = ++cons;
421 /* Nota: callback frees aiocbp itself */
422 if (aiocbp && aiocbp->aio_cb)
423 aiocbp->aio_cb(aiocbp, status ? -EIO : 0);
424 if (dev->ring.rsp_cons != cons)
425 /* We reentered, we must not continue here */
426 break;
427 }
429 RING_FINAL_CHECK_FOR_RESPONSES(&dev->ring, more);
430 if (more) goto moretodo;
432 return nr_consumed;
433 }
435 #ifdef HAVE_LIBC
436 int blkfront_open(struct blkfront_dev *dev)
437 {
438 dev->fd = alloc_fd(FTYPE_BLK);
439 printk("blk_open(%s) -> %d\n", dev->nodename, dev->fd);
440 files[dev->fd].blk.dev = dev;
441 return dev->fd;
442 }
443 #endif