ia64/xen-unstable

annotate extras/mini-os/blkfront.c @ 17042:a905c582a406

Add stubdomain support. See stubdom/README for usage details.

- Move PAGE_SIZE and STACK_SIZE into __PAGE_SIZE and __STACK_SIZE in
arch_limits.h so as to permit getting them from there without
pulling all the internal Mini-OS defines.
- Setup a xen-elf cross-compilation environment in stubdom/cross-root
- Add a POSIX layer on top of Mini-OS by linking against the newlib C
library and lwIP, and implementing the Unixish part in mini-os/lib/sys.c
- Cross-compile zlib and libpci too.
- Add an xs.h-compatible layer on top of Mini-OS' xenbus.
- Cross-compile libxc with an additional xc_minios.c and a few things
disabled.
- Cross-compile ioemu with an additional block-vbd, but without sound,
tpm and other details. A few hacks are needed:
- Align ide and scsi buffers at least on sector size to permit
direct transmission to the block backend. While we are at it, just
page-align it to possibly save a segment. Also, limit the scsi
buffer size because of limitations of the block paravirtualization
protocol.
- Allocate big tables dynamically rather that letting them go to
bss: when Mini-OS gets installed in memory, bss is not lazily
allocated, and doing so during Mini-OS is unnecessarily trick while
we can simply use malloc.
- Had to change the Mini-OS compilation somehow, so as to export
Mini-OS compilation flags to the Makefiles of libxc and ioemu.

Signed-off-by: Samuel Thibault <samuel.thibault@eu.citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Tue Feb 12 14:35:39 2008 +0000 (2008-02-12)
parents 0672d5b11262
children c1003b9e0bb8
rev   line source
keir@16797 1 /* Minimal block driver for Mini-OS.
keir@16797 2 * Copyright (c) 2007-2008 Samuel Thibault.
keir@16797 3 * Based on netfront.c.
keir@16797 4 */
keir@16797 5
keir@16797 6 #include <os.h>
keir@16797 7 #include <xenbus.h>
keir@16797 8 #include <events.h>
keir@16797 9 #include <errno.h>
keir@16797 10 #include <xen/io/blkif.h>
keir@16797 11 #include <gnttab.h>
keir@16797 12 #include <xmalloc.h>
keir@16797 13 #include <time.h>
keir@16797 14 #include <blkfront.h>
keir@16797 15 #include <lib.h>
keir@16797 16 #include <fcntl.h>
keir@16797 17
keir@17042 18 #ifndef HAVE_LIBC
keir@17042 19 #define strtoul simple_strtoul
keir@17042 20 #endif
keir@17042 21
keir@16797 22 /* Note: we generally don't need to disable IRQs since we hardly do anything in
keir@16797 23 * the interrupt handler. */
keir@16797 24
keir@16797 25 /* Note: we really suppose non-preemptive threads. */
keir@16797 26
keir@16797 27 DECLARE_WAIT_QUEUE_HEAD(blkfront_queue);
keir@16797 28
keir@16797 29
keir@16797 30
keir@16797 31
keir@16797 32 #define BLK_RING_SIZE __RING_SIZE((struct blkif_sring *)0, PAGE_SIZE)
keir@16797 33 #define GRANT_INVALID_REF 0
keir@16797 34
keir@16797 35
keir@16797 36 struct blk_buffer {
keir@16797 37 void* page;
keir@16797 38 grant_ref_t gref;
keir@16797 39 };
keir@16797 40
keir@16797 41 struct blkfront_dev {
keir@16994 42 domid_t dom;
keir@16994 43
keir@16797 44 struct blkif_front_ring ring;
keir@16797 45 grant_ref_t ring_ref;
keir@16797 46 evtchn_port_t evtchn, local_port;
keir@16797 47 blkif_vdev_t handle;
keir@16797 48
keir@16797 49 char *nodename;
keir@16797 50 char *backend;
keir@16797 51 unsigned sector_size;
keir@16797 52 unsigned sectors;
keir@16797 53 int mode;
keir@16797 54 int barrier;
keir@16797 55 int flush;
keir@17042 56
keir@17042 57 #ifdef HAVE_LIBC
keir@17042 58 int fd;
keir@17042 59 #endif
keir@16797 60 };
keir@16797 61
keir@16797 62 static inline int xenblk_rxidx(RING_IDX idx)
keir@16797 63 {
keir@16797 64 return idx & (BLK_RING_SIZE - 1);
keir@16797 65 }
keir@16797 66
keir@16797 67 void blkfront_handler(evtchn_port_t port, struct pt_regs *regs, void *data)
keir@16797 68 {
keir@17042 69 #ifdef HAVE_LIBC
keir@17042 70 struct blkfront_dev *dev = data;
keir@17042 71 int fd = dev->fd;
keir@17042 72
keir@17042 73 files[fd].read = 1;
keir@17042 74 #endif
keir@16797 75 wake_up(&blkfront_queue);
keir@16797 76 }
keir@16797 77
keir@16797 78 struct blkfront_dev *init_blkfront(char *nodename, uint64_t *sectors, unsigned *sector_size, int *mode)
keir@16797 79 {
keir@16797 80 xenbus_transaction_t xbt;
keir@16797 81 char* err;
keir@16797 82 char* message=NULL;
keir@16797 83 struct blkif_sring *s;
keir@16797 84 int retry=0;
keir@16797 85 char* msg;
keir@16797 86 char* c;
keir@16797 87
keir@16797 88 struct blkfront_dev *dev;
keir@16797 89
keir@16828 90 if (!nodename)
keir@16828 91 nodename = "device/vbd/768";
keir@16797 92
keir@16797 93 char path[strlen(nodename) + 1 + 10 + 1];
keir@16797 94
keir@16797 95 printk("******************* BLKFRONT for %s **********\n\n\n", nodename);
keir@16797 96
keir@16797 97 dev = malloc(sizeof(*dev));
keir@16797 98 dev->nodename = strdup(nodename);
keir@16797 99
keir@16994 100 evtchn_alloc_unbound_t op;
keir@16994 101 op.dom = DOMID_SELF;
keir@16994 102 snprintf(path, sizeof(path), "%s/backend-id", nodename);
keir@16994 103 dev->dom = op.remote_dom = xenbus_read_integer(path);
keir@16994 104 HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, &op);
keir@16994 105 clear_evtchn(op.port); /* Without, handler gets invoked now! */
keir@16994 106 dev->local_port = bind_evtchn(op.port, blkfront_handler, dev);
keir@16994 107 dev->evtchn=op.port;
keir@16994 108
keir@16797 109 s = (struct blkif_sring*) alloc_page();
keir@16797 110 memset(s,0,PAGE_SIZE);
keir@16797 111
keir@16797 112
keir@16797 113 SHARED_RING_INIT(s);
keir@16797 114 FRONT_RING_INIT(&dev->ring, s, PAGE_SIZE);
keir@16797 115
keir@16994 116 dev->ring_ref = gnttab_grant_access(dev->dom,virt_to_mfn(s),0);
keir@16797 117
keir@16797 118 // FIXME: proper frees on failures
keir@16797 119 again:
keir@16797 120 err = xenbus_transaction_start(&xbt);
keir@16797 121 if (err) {
keir@16797 122 printk("starting transaction\n");
keir@16797 123 }
keir@16797 124
keir@16797 125 err = xenbus_printf(xbt, nodename, "ring-ref","%u",
keir@16797 126 dev->ring_ref);
keir@16797 127 if (err) {
keir@16797 128 message = "writing ring-ref";
keir@16797 129 goto abort_transaction;
keir@16797 130 }
keir@16797 131 err = xenbus_printf(xbt, nodename,
keir@16797 132 "event-channel", "%u", dev->evtchn);
keir@16797 133 if (err) {
keir@16797 134 message = "writing event-channel";
keir@16797 135 goto abort_transaction;
keir@16797 136 }
keir@16797 137
keir@16797 138 err = xenbus_printf(xbt, nodename, "state", "%u",
keir@16797 139 4); /* connected */
keir@16797 140
keir@16797 141
keir@16797 142 err = xenbus_transaction_end(xbt, 0, &retry);
keir@16797 143 if (retry) {
keir@16797 144 goto again;
keir@16797 145 printk("completing transaction\n");
keir@16797 146 }
keir@16797 147
keir@16797 148 goto done;
keir@16797 149
keir@16797 150 abort_transaction:
keir@16797 151 xenbus_transaction_end(xbt, 1, &retry);
keir@16797 152 return NULL;
keir@16797 153
keir@16797 154 done:
keir@16797 155
keir@16797 156 snprintf(path, sizeof(path), "%s/backend", nodename);
keir@16797 157 msg = xenbus_read(XBT_NIL, path, &dev->backend);
keir@16797 158 if (msg) {
keir@16797 159 printk("Error %s when reading the backend path %s\n", msg, path);
keir@16797 160 return NULL;
keir@16797 161 }
keir@16797 162
keir@16797 163 printk("backend at %s\n", dev->backend);
keir@16797 164
keir@17042 165 dev->handle = strtoul(strrchr(nodename, '/')+1, NULL, 0);
keir@16797 166
keir@16797 167 {
keir@16797 168 char path[strlen(dev->backend) + 1 + 19 + 1];
keir@16797 169 snprintf(path, sizeof(path), "%s/mode", dev->backend);
keir@16797 170 msg = xenbus_read(XBT_NIL, path, &c);
keir@16797 171 if (msg) {
keir@16797 172 printk("Error %s when reading the mode\n", msg);
keir@16797 173 return NULL;
keir@16797 174 }
keir@16797 175 if (*c == 'w')
keir@16797 176 *mode = dev->mode = O_RDWR;
keir@16797 177 else
keir@16797 178 *mode = dev->mode = O_RDONLY;
keir@16797 179 free(c);
keir@16797 180
keir@16797 181 snprintf(path, sizeof(path), "%s/state", dev->backend);
keir@16797 182
keir@16797 183 xenbus_watch_path(XBT_NIL, path);
keir@16797 184
keir@16797 185 xenbus_wait_for_value(path,"4");
keir@16797 186
keir@16797 187 xenbus_unwatch_path(XBT_NIL, path);
keir@16797 188
keir@16797 189 snprintf(path, sizeof(path), "%s/sectors", dev->backend);
keir@16797 190 // FIXME: read_integer returns an int, so disk size limited to 1TB for now
keir@16797 191 *sectors = dev->sectors = xenbus_read_integer(path);
keir@16797 192
keir@16797 193 snprintf(path, sizeof(path), "%s/sector-size", dev->backend);
keir@16797 194 *sector_size = dev->sector_size = xenbus_read_integer(path);
keir@16797 195
keir@16797 196 snprintf(path, sizeof(path), "%s/feature-barrier", dev->backend);
keir@16797 197 dev->barrier = xenbus_read_integer(path);
keir@16797 198
keir@16797 199 snprintf(path, sizeof(path), "%s/feature-flush-cache", dev->backend);
keir@16797 200 dev->flush = xenbus_read_integer(path);
keir@16797 201 }
keir@16797 202
keir@16797 203 printk("%u sectors of %u bytes\n", dev->sectors, dev->sector_size);
keir@16797 204 printk("**************************\n");
keir@16797 205
keir@16797 206 return dev;
keir@16797 207 }
keir@16797 208
keir@16797 209 void shutdown_blkfront(struct blkfront_dev *dev)
keir@16797 210 {
keir@16797 211 char* err;
keir@16797 212 char *nodename = dev->nodename;
keir@16797 213
keir@16797 214 char path[strlen(dev->backend) + 1 + 5 + 1];
keir@16797 215
keir@16797 216 blkfront_sync(dev);
keir@16797 217
keir@16797 218 printk("close blk: backend at %s\n",dev->backend);
keir@16797 219
keir@16797 220 snprintf(path, sizeof(path), "%s/state", dev->backend);
keir@16797 221 err = xenbus_printf(XBT_NIL, nodename, "state", "%u", 5); /* closing */
keir@16797 222 xenbus_wait_for_value(path,"5");
keir@16797 223
keir@16797 224 err = xenbus_printf(XBT_NIL, nodename, "state", "%u", 6);
keir@16797 225 xenbus_wait_for_value(path,"6");
keir@16797 226
keir@16797 227 unbind_evtchn(dev->local_port);
keir@16797 228
keir@16797 229 free(nodename);
keir@16797 230 free(dev->backend);
keir@16797 231 free(dev);
keir@16797 232 }
keir@16797 233
keir@16797 234 static void blkfront_wait_slot(struct blkfront_dev *dev)
keir@16797 235 {
keir@16797 236 /* Wait for a slot */
keir@16797 237 if (RING_FULL(&dev->ring)) {
keir@16797 238 unsigned long flags;
keir@16797 239 DEFINE_WAIT(w);
keir@16797 240 local_irq_save(flags);
keir@16797 241 while (1) {
keir@16797 242 blkfront_aio_poll(dev);
keir@16797 243 if (!RING_FULL(&dev->ring))
keir@16797 244 break;
keir@16797 245 /* Really no slot, go to sleep. */
keir@16797 246 add_waiter(w, blkfront_queue);
keir@16797 247 local_irq_restore(flags);
keir@16797 248 schedule();
keir@16797 249 local_irq_save(flags);
keir@16797 250 }
keir@16797 251 remove_waiter(w);
keir@16797 252 local_irq_restore(flags);
keir@16797 253 }
keir@16797 254 }
keir@16797 255
keir@16797 256 /* Issue an aio */
keir@16797 257 void blkfront_aio(struct blkfront_aiocb *aiocbp, int write)
keir@16797 258 {
keir@16797 259 struct blkfront_dev *dev = aiocbp->aio_dev;
keir@16797 260 struct blkif_request *req;
keir@16797 261 RING_IDX i;
keir@16797 262 int notify;
keir@16797 263 int n, j;
keir@16797 264 uintptr_t start, end;
keir@16797 265
keir@16797 266 // Can't io at non-sector-aligned location
keir@16797 267 ASSERT(!(aiocbp->aio_offset & (dev->sector_size-1)));
keir@16797 268 // Can't io non-sector-sized amounts
keir@16797 269 ASSERT(!(aiocbp->aio_nbytes & (dev->sector_size-1)));
keir@16797 270 // Can't io non-sector-aligned buffer
keir@16797 271 ASSERT(!((uintptr_t) aiocbp->aio_buf & (dev->sector_size-1)));
keir@16797 272
keir@16797 273 start = (uintptr_t)aiocbp->aio_buf & PAGE_MASK;
keir@16797 274 end = ((uintptr_t)aiocbp->aio_buf + aiocbp->aio_nbytes + PAGE_SIZE - 1) & PAGE_MASK;
keir@16995 275 aiocbp->n = n = (end - start) / PAGE_SIZE;
keir@16797 276
keir@16797 277 /* qemu's IDE max multsect is 16 (8KB) and SCSI max DMA was set to 32KB,
keir@16797 278 * so max 44KB can't happen */
keir@16797 279 ASSERT(n <= BLKIF_MAX_SEGMENTS_PER_REQUEST);
keir@16797 280
keir@16797 281 blkfront_wait_slot(dev);
keir@16797 282 i = dev->ring.req_prod_pvt;
keir@16797 283 req = RING_GET_REQUEST(&dev->ring, i);
keir@16797 284
keir@16797 285 req->operation = write ? BLKIF_OP_WRITE : BLKIF_OP_READ;
keir@16797 286 req->nr_segments = n;
keir@16797 287 req->handle = dev->handle;
keir@16797 288 req->id = (uintptr_t) aiocbp;
keir@16797 289 req->sector_number = aiocbp->aio_offset / dev->sector_size;
keir@16797 290
keir@16797 291 for (j = 0; j < n; j++) {
keir@16797 292 uintptr_t data = start + j * PAGE_SIZE;
keir@16934 293 if (!write) {
keir@16934 294 /* Trigger CoW if needed */
keir@16934 295 *(char*)data = 0;
keir@16934 296 barrier();
keir@16934 297 }
keir@16797 298 aiocbp->gref[j] = req->seg[j].gref =
keir@16994 299 gnttab_grant_access(dev->dom, virtual_to_mfn(data), write);
keir@16797 300 req->seg[j].first_sect = 0;
keir@16797 301 req->seg[j].last_sect = PAGE_SIZE / dev->sector_size - 1;
keir@16797 302 }
keir@16797 303 req->seg[0].first_sect = ((uintptr_t)aiocbp->aio_buf & ~PAGE_MASK) / dev->sector_size;
keir@16797 304 req->seg[n-1].last_sect = (((uintptr_t)aiocbp->aio_buf + aiocbp->aio_nbytes - 1) & ~PAGE_MASK) / dev->sector_size;
keir@16797 305
keir@16797 306 dev->ring.req_prod_pvt = i + 1;
keir@16797 307
keir@16797 308 wmb();
keir@16797 309 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&dev->ring, notify);
keir@16797 310
keir@16797 311 if(notify) notify_remote_via_evtchn(dev->evtchn);
keir@16797 312 }
keir@16797 313
keir@16797 314 void blkfront_aio_write(struct blkfront_aiocb *aiocbp)
keir@16797 315 {
keir@16797 316 blkfront_aio(aiocbp, 1);
keir@16797 317 }
keir@16797 318
keir@16797 319 void blkfront_aio_read(struct blkfront_aiocb *aiocbp)
keir@16797 320 {
keir@16797 321 blkfront_aio(aiocbp, 0);
keir@16797 322 }
keir@16797 323
keir@16797 324 int blkfront_aio_poll(struct blkfront_dev *dev)
keir@16797 325 {
keir@16797 326 RING_IDX rp, cons;
keir@16797 327 struct blkif_response *rsp;
keir@16797 328
keir@16797 329 moretodo:
keir@16797 330 rp = dev->ring.sring->rsp_prod;
keir@16797 331 rmb(); /* Ensure we see queued responses up to 'rp'. */
keir@16797 332 cons = dev->ring.rsp_cons;
keir@16797 333
keir@16797 334 int nr_consumed = 0;
keir@16797 335 while ((cons != rp))
keir@16797 336 {
keir@16797 337 rsp = RING_GET_RESPONSE(&dev->ring, cons);
keir@16797 338
keir@17042 339 if (rsp->status != BLKIF_RSP_OKAY)
keir@17042 340 printk("block error %d for op %d\n", rsp->status, rsp->operation);
keir@17042 341
keir@16797 342 switch (rsp->operation) {
keir@16797 343 case BLKIF_OP_READ:
keir@16797 344 case BLKIF_OP_WRITE:
keir@16797 345 {
keir@16797 346 struct blkfront_aiocb *aiocbp = (void*) (uintptr_t) rsp->id;
keir@16995 347 int j;
keir@17042 348
keir@16995 349 for (j = 0; j < aiocbp->n; j++)
keir@16797 350 gnttab_end_access(aiocbp->gref[j]);
keir@16797 351
keir@16797 352 /* Nota: callback frees aiocbp itself */
keir@16797 353 aiocbp->aio_cb(aiocbp, rsp->status ? -EIO : 0);
keir@16797 354 break;
keir@16797 355 }
keir@16797 356 case BLKIF_OP_WRITE_BARRIER:
keir@16797 357 case BLKIF_OP_FLUSH_DISKCACHE:
keir@16797 358 break;
keir@16797 359 default:
keir@16797 360 printk("unrecognized block operation %d response\n", rsp->operation);
keir@16797 361 break;
keir@16797 362 }
keir@16797 363
keir@16797 364 nr_consumed++;
keir@16797 365 ++cons;
keir@16797 366 }
keir@16797 367 dev->ring.rsp_cons = cons;
keir@16797 368
keir@16797 369 int more;
keir@16797 370 RING_FINAL_CHECK_FOR_RESPONSES(&dev->ring, more);
keir@16797 371 if (more) goto moretodo;
keir@16797 372
keir@16797 373 return nr_consumed;
keir@16797 374 }
keir@16797 375
keir@16797 376 static void blkfront_push_operation(struct blkfront_dev *dev, uint8_t op)
keir@16797 377 {
keir@16797 378 int i;
keir@16797 379 struct blkif_request *req;
keir@16797 380 int notify;
keir@16797 381
keir@16797 382 blkfront_wait_slot(dev);
keir@16797 383 i = dev->ring.req_prod_pvt;
keir@16797 384 req = RING_GET_REQUEST(&dev->ring, i);
keir@16797 385 req->operation = op;
keir@17042 386 req->nr_segments = 0;
keir@17042 387 req->handle = dev->handle;
keir@17042 388 /* Not used */
keir@17042 389 req->id = 0;
keir@17042 390 /* Not needed anyway, but the backend will check it */
keir@17042 391 req->sector_number = 0;
keir@16797 392 dev->ring.req_prod_pvt = i + 1;
keir@16797 393 wmb();
keir@16797 394 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&dev->ring, notify);
keir@16797 395 if (notify) notify_remote_via_evtchn(dev->evtchn);
keir@16797 396 }
keir@16797 397
keir@16797 398 void blkfront_sync(struct blkfront_dev *dev)
keir@16797 399 {
keir@16797 400 unsigned long flags;
keir@16797 401
keir@17042 402 if (dev->mode == O_RDWR) {
keir@17042 403 if (dev->barrier == 1)
keir@17042 404 blkfront_push_operation(dev, BLKIF_OP_WRITE_BARRIER);
keir@16797 405
keir@17042 406 if (dev->flush == 1)
keir@17042 407 blkfront_push_operation(dev, BLKIF_OP_FLUSH_DISKCACHE);
keir@17042 408 }
keir@16797 409
keir@16797 410 /* Note: This won't finish if another thread enqueues requests. */
keir@16797 411 local_irq_save(flags);
keir@16797 412 DEFINE_WAIT(w);
keir@16797 413 while (1) {
keir@16797 414 blkfront_aio_poll(dev);
keir@16797 415 if (RING_FREE_REQUESTS(&dev->ring) == RING_SIZE(&dev->ring))
keir@16797 416 break;
keir@16797 417
keir@16797 418 add_waiter(w, blkfront_queue);
keir@16797 419 local_irq_restore(flags);
keir@16797 420 schedule();
keir@16797 421 local_irq_save(flags);
keir@16797 422 }
keir@16797 423 remove_waiter(w);
keir@16797 424 local_irq_restore(flags);
keir@16797 425 }
keir@17042 426
keir@17042 427 #ifdef HAVE_LIBC
keir@17042 428 int blkfront_open(struct blkfront_dev *dev)
keir@17042 429 {
keir@17042 430 dev->fd = alloc_fd(FTYPE_BLK);
keir@17042 431 printk("blk_open(%s) -> %d\n", dev->nodename, dev->fd);
keir@17042 432 files[dev->fd].blk.dev = dev;
keir@17042 433 return dev->fd;
keir@17042 434 }
keir@17042 435 #endif