ia64/xen-unstable

annotate extras/mini-os/blkfront.c @ 16934:1a357a1504b2

minios: Fix bug when blkfront reading into zero-mapped buffer
by just poking the page.
No need to use virtual_to_mfn() for the ring since that is a real page.

Signed-off-by: Samuel Thibault <samuel.thibault@eu.citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Tue Jan 29 15:18:27 2008 +0000 (2008-01-29)
parents ed540d61339e
children c9844192c965
rev   line source
keir@16797 1 /* Minimal block driver for Mini-OS.
keir@16797 2 * Copyright (c) 2007-2008 Samuel Thibault.
keir@16797 3 * Based on netfront.c.
keir@16797 4 */
keir@16797 5
keir@16797 6 #include <os.h>
keir@16797 7 #include <xenbus.h>
keir@16797 8 #include <events.h>
keir@16797 9 #include <errno.h>
keir@16797 10 #include <xen/io/blkif.h>
keir@16797 11 #include <gnttab.h>
keir@16797 12 #include <xmalloc.h>
keir@16797 13 #include <time.h>
keir@16797 14 #include <blkfront.h>
keir@16797 15 #include <lib.h>
keir@16797 16 #include <fcntl.h>
keir@16797 17
keir@16797 18 /* Note: we generally don't need to disable IRQs since we hardly do anything in
keir@16797 19 * the interrupt handler. */
keir@16797 20
keir@16797 21 /* Note: we really suppose non-preemptive threads. */
keir@16797 22
keir@16797 23 DECLARE_WAIT_QUEUE_HEAD(blkfront_queue);
keir@16797 24
keir@16797 25
keir@16797 26
keir@16797 27
keir@16797 28 #define BLK_RING_SIZE __RING_SIZE((struct blkif_sring *)0, PAGE_SIZE)
keir@16797 29 #define GRANT_INVALID_REF 0
keir@16797 30
keir@16797 31
keir@16797 32 struct blk_buffer {
keir@16797 33 void* page;
keir@16797 34 grant_ref_t gref;
keir@16797 35 };
keir@16797 36
keir@16797 37 struct blkfront_dev {
keir@16797 38 struct blkif_front_ring ring;
keir@16797 39 grant_ref_t ring_ref;
keir@16797 40 evtchn_port_t evtchn, local_port;
keir@16797 41 blkif_vdev_t handle;
keir@16797 42
keir@16797 43 char *nodename;
keir@16797 44 char *backend;
keir@16797 45 unsigned sector_size;
keir@16797 46 unsigned sectors;
keir@16797 47 int mode;
keir@16797 48 int barrier;
keir@16797 49 int flush;
keir@16797 50 };
keir@16797 51
keir@16797 52 static inline int xenblk_rxidx(RING_IDX idx)
keir@16797 53 {
keir@16797 54 return idx & (BLK_RING_SIZE - 1);
keir@16797 55 }
keir@16797 56
keir@16797 57 void blkfront_handler(evtchn_port_t port, struct pt_regs *regs, void *data)
keir@16797 58 {
keir@16797 59 wake_up(&blkfront_queue);
keir@16797 60 }
keir@16797 61
keir@16797 62 struct blkfront_dev *init_blkfront(char *nodename, uint64_t *sectors, unsigned *sector_size, int *mode)
keir@16797 63 {
keir@16797 64 xenbus_transaction_t xbt;
keir@16797 65 char* err;
keir@16797 66 char* message=NULL;
keir@16797 67 struct blkif_sring *s;
keir@16797 68 int retry=0;
keir@16797 69 char* msg;
keir@16797 70 char* c;
keir@16797 71
keir@16797 72 struct blkfront_dev *dev;
keir@16797 73
keir@16828 74 if (!nodename)
keir@16828 75 nodename = "device/vbd/768";
keir@16797 76
keir@16797 77 char path[strlen(nodename) + 1 + 10 + 1];
keir@16797 78
keir@16797 79 printk("******************* BLKFRONT for %s **********\n\n\n", nodename);
keir@16797 80
keir@16797 81 dev = malloc(sizeof(*dev));
keir@16797 82 dev->nodename = strdup(nodename);
keir@16797 83
keir@16797 84 s = (struct blkif_sring*) alloc_page();
keir@16797 85 memset(s,0,PAGE_SIZE);
keir@16797 86
keir@16797 87
keir@16797 88 SHARED_RING_INIT(s);
keir@16797 89 FRONT_RING_INIT(&dev->ring, s, PAGE_SIZE);
keir@16797 90
keir@16934 91 dev->ring_ref = gnttab_grant_access(0,virt_to_mfn(s),0);
keir@16797 92
keir@16797 93 evtchn_alloc_unbound_t op;
keir@16797 94 op.dom = DOMID_SELF;
keir@16797 95 snprintf(path, sizeof(path), "%s/backend-id", nodename);
keir@16797 96 op.remote_dom = xenbus_read_integer(path);
keir@16797 97 HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, &op);
keir@16797 98 clear_evtchn(op.port); /* Without, handler gets invoked now! */
keir@16797 99 dev->local_port = bind_evtchn(op.port, blkfront_handler, dev);
keir@16797 100 dev->evtchn=op.port;
keir@16797 101
keir@16797 102 // FIXME: proper frees on failures
keir@16797 103 again:
keir@16797 104 err = xenbus_transaction_start(&xbt);
keir@16797 105 if (err) {
keir@16797 106 printk("starting transaction\n");
keir@16797 107 }
keir@16797 108
keir@16797 109 err = xenbus_printf(xbt, nodename, "ring-ref","%u",
keir@16797 110 dev->ring_ref);
keir@16797 111 if (err) {
keir@16797 112 message = "writing ring-ref";
keir@16797 113 goto abort_transaction;
keir@16797 114 }
keir@16797 115 err = xenbus_printf(xbt, nodename,
keir@16797 116 "event-channel", "%u", dev->evtchn);
keir@16797 117 if (err) {
keir@16797 118 message = "writing event-channel";
keir@16797 119 goto abort_transaction;
keir@16797 120 }
keir@16797 121
keir@16797 122 err = xenbus_printf(xbt, nodename, "state", "%u",
keir@16797 123 4); /* connected */
keir@16797 124
keir@16797 125
keir@16797 126 err = xenbus_transaction_end(xbt, 0, &retry);
keir@16797 127 if (retry) {
keir@16797 128 goto again;
keir@16797 129 printk("completing transaction\n");
keir@16797 130 }
keir@16797 131
keir@16797 132 goto done;
keir@16797 133
keir@16797 134 abort_transaction:
keir@16797 135 xenbus_transaction_end(xbt, 1, &retry);
keir@16797 136 return NULL;
keir@16797 137
keir@16797 138 done:
keir@16797 139
keir@16797 140 snprintf(path, sizeof(path), "%s/backend", nodename);
keir@16797 141 msg = xenbus_read(XBT_NIL, path, &dev->backend);
keir@16797 142 if (msg) {
keir@16797 143 printk("Error %s when reading the backend path %s\n", msg, path);
keir@16797 144 return NULL;
keir@16797 145 }
keir@16797 146
keir@16797 147 printk("backend at %s\n", dev->backend);
keir@16797 148
keir@16797 149 dev->handle = simple_strtoul(strrchr(nodename, '/')+1, NULL, 0);
keir@16797 150
keir@16797 151 {
keir@16797 152 char path[strlen(dev->backend) + 1 + 19 + 1];
keir@16797 153 snprintf(path, sizeof(path), "%s/mode", dev->backend);
keir@16797 154 msg = xenbus_read(XBT_NIL, path, &c);
keir@16797 155 if (msg) {
keir@16797 156 printk("Error %s when reading the mode\n", msg);
keir@16797 157 return NULL;
keir@16797 158 }
keir@16797 159 if (*c == 'w')
keir@16797 160 *mode = dev->mode = O_RDWR;
keir@16797 161 else
keir@16797 162 *mode = dev->mode = O_RDONLY;
keir@16797 163 free(c);
keir@16797 164
keir@16797 165 snprintf(path, sizeof(path), "%s/state", dev->backend);
keir@16797 166
keir@16797 167 xenbus_watch_path(XBT_NIL, path);
keir@16797 168
keir@16797 169 xenbus_wait_for_value(path,"4");
keir@16797 170
keir@16797 171 xenbus_unwatch_path(XBT_NIL, path);
keir@16797 172
keir@16797 173 snprintf(path, sizeof(path), "%s/sectors", dev->backend);
keir@16797 174 // FIXME: read_integer returns an int, so disk size limited to 1TB for now
keir@16797 175 *sectors = dev->sectors = xenbus_read_integer(path);
keir@16797 176
keir@16797 177 snprintf(path, sizeof(path), "%s/sector-size", dev->backend);
keir@16797 178 *sector_size = dev->sector_size = xenbus_read_integer(path);
keir@16797 179
keir@16797 180 snprintf(path, sizeof(path), "%s/feature-barrier", dev->backend);
keir@16797 181 dev->barrier = xenbus_read_integer(path);
keir@16797 182
keir@16797 183 snprintf(path, sizeof(path), "%s/feature-flush-cache", dev->backend);
keir@16797 184 dev->flush = xenbus_read_integer(path);
keir@16797 185 }
keir@16797 186
keir@16797 187 printk("%u sectors of %u bytes\n", dev->sectors, dev->sector_size);
keir@16797 188 printk("**************************\n");
keir@16797 189
keir@16797 190 return dev;
keir@16797 191 }
keir@16797 192
keir@16797 193 void shutdown_blkfront(struct blkfront_dev *dev)
keir@16797 194 {
keir@16797 195 char* err;
keir@16797 196 char *nodename = dev->nodename;
keir@16797 197
keir@16797 198 char path[strlen(dev->backend) + 1 + 5 + 1];
keir@16797 199
keir@16797 200 blkfront_sync(dev);
keir@16797 201
keir@16797 202 printk("close blk: backend at %s\n",dev->backend);
keir@16797 203
keir@16797 204 snprintf(path, sizeof(path), "%s/state", dev->backend);
keir@16797 205 err = xenbus_printf(XBT_NIL, nodename, "state", "%u", 5); /* closing */
keir@16797 206 xenbus_wait_for_value(path,"5");
keir@16797 207
keir@16797 208 err = xenbus_printf(XBT_NIL, nodename, "state", "%u", 6);
keir@16797 209 xenbus_wait_for_value(path,"6");
keir@16797 210
keir@16797 211 unbind_evtchn(dev->local_port);
keir@16797 212
keir@16797 213 free(nodename);
keir@16797 214 free(dev->backend);
keir@16797 215 free(dev);
keir@16797 216 }
keir@16797 217
keir@16797 218 static void blkfront_wait_slot(struct blkfront_dev *dev)
keir@16797 219 {
keir@16797 220 /* Wait for a slot */
keir@16797 221 if (RING_FULL(&dev->ring)) {
keir@16797 222 unsigned long flags;
keir@16797 223 DEFINE_WAIT(w);
keir@16797 224 local_irq_save(flags);
keir@16797 225 while (1) {
keir@16797 226 blkfront_aio_poll(dev);
keir@16797 227 if (!RING_FULL(&dev->ring))
keir@16797 228 break;
keir@16797 229 /* Really no slot, go to sleep. */
keir@16797 230 add_waiter(w, blkfront_queue);
keir@16797 231 local_irq_restore(flags);
keir@16797 232 schedule();
keir@16797 233 local_irq_save(flags);
keir@16797 234 }
keir@16797 235 remove_waiter(w);
keir@16797 236 local_irq_restore(flags);
keir@16797 237 }
keir@16797 238 }
keir@16797 239
keir@16797 240 /* Issue an aio */
keir@16797 241 void blkfront_aio(struct blkfront_aiocb *aiocbp, int write)
keir@16797 242 {
keir@16797 243 struct blkfront_dev *dev = aiocbp->aio_dev;
keir@16797 244 struct blkif_request *req;
keir@16797 245 RING_IDX i;
keir@16797 246 int notify;
keir@16797 247 int n, j;
keir@16797 248 uintptr_t start, end;
keir@16797 249
keir@16797 250 // Can't io at non-sector-aligned location
keir@16797 251 ASSERT(!(aiocbp->aio_offset & (dev->sector_size-1)));
keir@16797 252 // Can't io non-sector-sized amounts
keir@16797 253 ASSERT(!(aiocbp->aio_nbytes & (dev->sector_size-1)));
keir@16797 254 // Can't io non-sector-aligned buffer
keir@16797 255 ASSERT(!((uintptr_t) aiocbp->aio_buf & (dev->sector_size-1)));
keir@16797 256
keir@16797 257 start = (uintptr_t)aiocbp->aio_buf & PAGE_MASK;
keir@16797 258 end = ((uintptr_t)aiocbp->aio_buf + aiocbp->aio_nbytes + PAGE_SIZE - 1) & PAGE_MASK;
keir@16797 259 n = (end - start) / PAGE_SIZE;
keir@16797 260
keir@16797 261 /* qemu's IDE max multsect is 16 (8KB) and SCSI max DMA was set to 32KB,
keir@16797 262 * so max 44KB can't happen */
keir@16797 263 ASSERT(n <= BLKIF_MAX_SEGMENTS_PER_REQUEST);
keir@16797 264
keir@16797 265 blkfront_wait_slot(dev);
keir@16797 266 i = dev->ring.req_prod_pvt;
keir@16797 267 req = RING_GET_REQUEST(&dev->ring, i);
keir@16797 268
keir@16797 269 req->operation = write ? BLKIF_OP_WRITE : BLKIF_OP_READ;
keir@16797 270 req->nr_segments = n;
keir@16797 271 req->handle = dev->handle;
keir@16797 272 req->id = (uintptr_t) aiocbp;
keir@16797 273 req->sector_number = aiocbp->aio_offset / dev->sector_size;
keir@16797 274
keir@16797 275 for (j = 0; j < n; j++) {
keir@16797 276 uintptr_t data = start + j * PAGE_SIZE;
keir@16934 277 if (!write) {
keir@16934 278 /* Trigger CoW if needed */
keir@16934 279 *(char*)data = 0;
keir@16934 280 barrier();
keir@16934 281 }
keir@16797 282 aiocbp->gref[j] = req->seg[j].gref =
keir@16867 283 gnttab_grant_access(0, virtual_to_mfn(data), write);
keir@16797 284 req->seg[j].first_sect = 0;
keir@16797 285 req->seg[j].last_sect = PAGE_SIZE / dev->sector_size - 1;
keir@16797 286 }
keir@16797 287 req->seg[0].first_sect = ((uintptr_t)aiocbp->aio_buf & ~PAGE_MASK) / dev->sector_size;
keir@16797 288 req->seg[n-1].last_sect = (((uintptr_t)aiocbp->aio_buf + aiocbp->aio_nbytes - 1) & ~PAGE_MASK) / dev->sector_size;
keir@16797 289
keir@16797 290 dev->ring.req_prod_pvt = i + 1;
keir@16797 291
keir@16797 292 wmb();
keir@16797 293 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&dev->ring, notify);
keir@16797 294
keir@16797 295 if(notify) notify_remote_via_evtchn(dev->evtchn);
keir@16797 296 }
keir@16797 297
keir@16797 298 void blkfront_aio_write(struct blkfront_aiocb *aiocbp)
keir@16797 299 {
keir@16797 300 blkfront_aio(aiocbp, 1);
keir@16797 301 }
keir@16797 302
keir@16797 303 void blkfront_aio_read(struct blkfront_aiocb *aiocbp)
keir@16797 304 {
keir@16797 305 blkfront_aio(aiocbp, 0);
keir@16797 306 }
keir@16797 307
keir@16797 308 int blkfront_aio_poll(struct blkfront_dev *dev)
keir@16797 309 {
keir@16797 310 RING_IDX rp, cons;
keir@16797 311 struct blkif_response *rsp;
keir@16797 312
keir@16797 313 moretodo:
keir@16797 314 rp = dev->ring.sring->rsp_prod;
keir@16797 315 rmb(); /* Ensure we see queued responses up to 'rp'. */
keir@16797 316 cons = dev->ring.rsp_cons;
keir@16797 317
keir@16797 318 int nr_consumed = 0;
keir@16797 319 while ((cons != rp))
keir@16797 320 {
keir@16797 321 rsp = RING_GET_RESPONSE(&dev->ring, cons);
keir@16797 322
keir@16797 323 switch (rsp->operation) {
keir@16797 324 case BLKIF_OP_READ:
keir@16797 325 case BLKIF_OP_WRITE:
keir@16797 326 {
keir@16797 327 struct blkfront_aiocb *aiocbp = (void*) (uintptr_t) rsp->id;
keir@16797 328 int n = (aiocbp->aio_nbytes + PAGE_SIZE - 1) / PAGE_SIZE, j;
keir@16797 329 for (j = 0; j < n; j++)
keir@16797 330 gnttab_end_access(aiocbp->gref[j]);
keir@16797 331
keir@16797 332 /* Nota: callback frees aiocbp itself */
keir@16797 333 aiocbp->aio_cb(aiocbp, rsp->status ? -EIO : 0);
keir@16797 334 break;
keir@16797 335 }
keir@16797 336 case BLKIF_OP_WRITE_BARRIER:
keir@16797 337 case BLKIF_OP_FLUSH_DISKCACHE:
keir@16797 338 break;
keir@16797 339 default:
keir@16797 340 printk("unrecognized block operation %d response\n", rsp->operation);
keir@16797 341 break;
keir@16797 342 }
keir@16797 343
keir@16797 344 nr_consumed++;
keir@16797 345 ++cons;
keir@16797 346 }
keir@16797 347 dev->ring.rsp_cons = cons;
keir@16797 348
keir@16797 349 int more;
keir@16797 350 RING_FINAL_CHECK_FOR_RESPONSES(&dev->ring, more);
keir@16797 351 if (more) goto moretodo;
keir@16797 352
keir@16797 353 return nr_consumed;
keir@16797 354 }
keir@16797 355
keir@16797 356 static void blkfront_push_operation(struct blkfront_dev *dev, uint8_t op)
keir@16797 357 {
keir@16797 358 int i;
keir@16797 359 struct blkif_request *req;
keir@16797 360 int notify;
keir@16797 361
keir@16797 362 blkfront_wait_slot(dev);
keir@16797 363 i = dev->ring.req_prod_pvt;
keir@16797 364 req = RING_GET_REQUEST(&dev->ring, i);
keir@16797 365 req->operation = op;
keir@16797 366 dev->ring.req_prod_pvt = i + 1;
keir@16797 367 wmb();
keir@16797 368 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&dev->ring, notify);
keir@16797 369 if (notify) notify_remote_via_evtchn(dev->evtchn);
keir@16797 370 }
keir@16797 371
keir@16797 372 void blkfront_sync(struct blkfront_dev *dev)
keir@16797 373 {
keir@16797 374 unsigned long flags;
keir@16797 375
keir@16797 376 if (dev->barrier == 1)
keir@16797 377 blkfront_push_operation(dev, BLKIF_OP_WRITE_BARRIER);
keir@16797 378
keir@16797 379 if (dev->flush == 1)
keir@16797 380 blkfront_push_operation(dev, BLKIF_OP_FLUSH_DISKCACHE);
keir@16797 381
keir@16797 382 /* Note: This won't finish if another thread enqueues requests. */
keir@16797 383 local_irq_save(flags);
keir@16797 384 DEFINE_WAIT(w);
keir@16797 385 while (1) {
keir@16797 386 blkfront_aio_poll(dev);
keir@16797 387 if (RING_FREE_REQUESTS(&dev->ring) == RING_SIZE(&dev->ring))
keir@16797 388 break;
keir@16797 389
keir@16797 390 add_waiter(w, blkfront_queue);
keir@16797 391 local_irq_restore(flags);
keir@16797 392 schedule();
keir@16797 393 local_irq_save(flags);
keir@16797 394 }
keir@16797 395 remove_waiter(w);
keir@16797 396 local_irq_restore(flags);
keir@16797 397 }