ia64/xen-unstable

changeset 16797:764d6741de07

minios: add blkfront

Signed-off-by: Samuel Thibault <samuel.thibault@eu.citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Thu Jan 17 16:41:46 2008 +0000 (2008-01-17)
parents bf828db8d017
children a1a3fe600ef3
files extras/mini-os/blkfront.c extras/mini-os/include/blkfront.h
line diff
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/extras/mini-os/blkfront.c	Thu Jan 17 16:41:46 2008 +0000
     1.3 @@ -0,0 +1,392 @@
     1.4 +/* Minimal block driver for Mini-OS. 
     1.5 + * Copyright (c) 2007-2008 Samuel Thibault.
     1.6 + * Based on netfront.c.
     1.7 + */
     1.8 +
     1.9 +#include <os.h>
    1.10 +#include <xenbus.h>
    1.11 +#include <events.h>
    1.12 +#include <errno.h>
    1.13 +#include <xen/io/blkif.h>
    1.14 +#include <gnttab.h>
    1.15 +#include <xmalloc.h>
    1.16 +#include <time.h>
    1.17 +#include <blkfront.h>
    1.18 +#include <lib.h>
    1.19 +#include <fcntl.h>
    1.20 +
    1.21 +/* Note: we generally don't need to disable IRQs since we hardly do anything in
    1.22 + * the interrupt handler.  */
    1.23 +
    1.24 +/* Note: we really suppose non-preemptive threads.  */
    1.25 +
    1.26 +DECLARE_WAIT_QUEUE_HEAD(blkfront_queue);
    1.27 +
    1.28 +
    1.29 +
    1.30 +
    1.31 +#define BLK_RING_SIZE __RING_SIZE((struct blkif_sring *)0, PAGE_SIZE)
    1.32 +#define GRANT_INVALID_REF 0
    1.33 +
    1.34 +
    1.35 +struct blk_buffer {
    1.36 +    void* page;
    1.37 +    grant_ref_t gref;
    1.38 +};
    1.39 +
    1.40 +struct blkfront_dev {
    1.41 +    struct blkif_front_ring ring;
    1.42 +    grant_ref_t ring_ref;
    1.43 +    evtchn_port_t evtchn, local_port;
    1.44 +    blkif_vdev_t handle;
    1.45 +
    1.46 +    char *nodename;
    1.47 +    char *backend;
    1.48 +    unsigned sector_size;
    1.49 +    unsigned sectors;
    1.50 +    int mode;
    1.51 +    int barrier;
    1.52 +    int flush;
    1.53 +};
    1.54 +
    1.55 +static inline int xenblk_rxidx(RING_IDX idx)
    1.56 +{
    1.57 +    return idx & (BLK_RING_SIZE - 1);
    1.58 +}
    1.59 +
    1.60 +void blkfront_handler(evtchn_port_t port, struct pt_regs *regs, void *data)
    1.61 +{
    1.62 +    wake_up(&blkfront_queue);
    1.63 +}
    1.64 +
    1.65 +struct blkfront_dev *init_blkfront(char *nodename, uint64_t *sectors, unsigned *sector_size, int *mode)
    1.66 +{
    1.67 +    xenbus_transaction_t xbt;
    1.68 +    char* err;
    1.69 +    char* message=NULL;
    1.70 +    struct blkif_sring *s;
    1.71 +    int retry=0;
    1.72 +    char* msg;
    1.73 +    char* c;
    1.74 +
    1.75 +    struct blkfront_dev *dev;
    1.76 +
    1.77 +    ASSERT(!strncmp(nodename, "/local/domain/", 14));
    1.78 +    nodename = strchr(nodename + 14, '/') + 1;
    1.79 +
    1.80 +    char path[strlen(nodename) + 1 + 10 + 1];
    1.81 +
    1.82 +    printk("******************* BLKFRONT for %s **********\n\n\n", nodename);
    1.83 +
    1.84 +    dev = malloc(sizeof(*dev));
    1.85 +    dev->nodename = strdup(nodename);
    1.86 +
    1.87 +    s = (struct blkif_sring*) alloc_page();
    1.88 +    memset(s,0,PAGE_SIZE);
    1.89 +
    1.90 +
    1.91 +    SHARED_RING_INIT(s);
    1.92 +    FRONT_RING_INIT(&dev->ring, s, PAGE_SIZE);
    1.93 +
    1.94 +    dev->ring_ref = gnttab_grant_access(0,virt_to_mfn(s),0);
    1.95 +
    1.96 +    evtchn_alloc_unbound_t op;
    1.97 +    op.dom = DOMID_SELF;
    1.98 +    snprintf(path, sizeof(path), "%s/backend-id", nodename);
    1.99 +    op.remote_dom = xenbus_read_integer(path); 
   1.100 +    HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, &op);
   1.101 +    clear_evtchn(op.port);        /* Without, handler gets invoked now! */
   1.102 +    dev->local_port = bind_evtchn(op.port, blkfront_handler, dev);
   1.103 +    dev->evtchn=op.port;
   1.104 +
   1.105 +    // FIXME: proper frees on failures
   1.106 +again:
   1.107 +    err = xenbus_transaction_start(&xbt);
   1.108 +    if (err) {
   1.109 +        printk("starting transaction\n");
   1.110 +    }
   1.111 +
   1.112 +    err = xenbus_printf(xbt, nodename, "ring-ref","%u",
   1.113 +                dev->ring_ref);
   1.114 +    if (err) {
   1.115 +        message = "writing ring-ref";
   1.116 +        goto abort_transaction;
   1.117 +    }
   1.118 +    err = xenbus_printf(xbt, nodename,
   1.119 +                "event-channel", "%u", dev->evtchn);
   1.120 +    if (err) {
   1.121 +        message = "writing event-channel";
   1.122 +        goto abort_transaction;
   1.123 +    }
   1.124 +
   1.125 +    err = xenbus_printf(xbt, nodename, "state", "%u",
   1.126 +            4); /* connected */
   1.127 +
   1.128 +
   1.129 +    err = xenbus_transaction_end(xbt, 0, &retry);
   1.130 +    if (retry) {
   1.131 +            goto again;
   1.132 +        printk("completing transaction\n");
   1.133 +    }
   1.134 +
   1.135 +    goto done;
   1.136 +
   1.137 +abort_transaction:
   1.138 +    xenbus_transaction_end(xbt, 1, &retry);
   1.139 +    return NULL;
   1.140 +
   1.141 +done:
   1.142 +
   1.143 +    snprintf(path, sizeof(path), "%s/backend", nodename);
   1.144 +    msg = xenbus_read(XBT_NIL, path, &dev->backend);
   1.145 +    if (msg) {
   1.146 +        printk("Error %s when reading the backend path %s\n", msg, path);
   1.147 +        return NULL;
   1.148 +    }
   1.149 +
   1.150 +    printk("backend at %s\n", dev->backend);
   1.151 +
   1.152 +    dev->handle = simple_strtoul(strrchr(nodename, '/')+1, NULL, 0);
   1.153 +
   1.154 +    {
   1.155 +        char path[strlen(dev->backend) + 1 + 19 + 1];
   1.156 +        snprintf(path, sizeof(path), "%s/mode", dev->backend);
   1.157 +        msg = xenbus_read(XBT_NIL, path, &c);
   1.158 +        if (msg) {
   1.159 +            printk("Error %s when reading the mode\n", msg);
   1.160 +            return NULL;
   1.161 +        }
   1.162 +        if (*c == 'w')
   1.163 +            *mode = dev->mode = O_RDWR;
   1.164 +        else
   1.165 +            *mode = dev->mode = O_RDONLY;
   1.166 +        free(c);
   1.167 +
   1.168 +        snprintf(path, sizeof(path), "%s/state", dev->backend);
   1.169 +
   1.170 +        xenbus_watch_path(XBT_NIL, path);
   1.171 +
   1.172 +        xenbus_wait_for_value(path,"4");
   1.173 +
   1.174 +        xenbus_unwatch_path(XBT_NIL, path);
   1.175 +
   1.176 +        snprintf(path, sizeof(path), "%s/sectors", dev->backend);
   1.177 +        // FIXME: read_integer returns an int, so disk size limited to 1TB for now
   1.178 +        *sectors = dev->sectors = xenbus_read_integer(path);
   1.179 +
   1.180 +        snprintf(path, sizeof(path), "%s/sector-size", dev->backend);
   1.181 +        *sector_size = dev->sector_size = xenbus_read_integer(path);
   1.182 +
   1.183 +        snprintf(path, sizeof(path), "%s/feature-barrier", dev->backend);
   1.184 +        dev->barrier = xenbus_read_integer(path);
   1.185 +
   1.186 +        snprintf(path, sizeof(path), "%s/feature-flush-cache", dev->backend);
   1.187 +        dev->flush = xenbus_read_integer(path);
   1.188 +    }
   1.189 +
   1.190 +    printk("%u sectors of %u bytes\n", dev->sectors, dev->sector_size);
   1.191 +    printk("**************************\n");
   1.192 +
   1.193 +    return dev;
   1.194 +}
   1.195 +
   1.196 +void shutdown_blkfront(struct blkfront_dev *dev)
   1.197 +{
   1.198 +    char* err;
   1.199 +    char *nodename = dev->nodename;
   1.200 +
   1.201 +    char path[strlen(dev->backend) + 1 + 5 + 1];
   1.202 +
   1.203 +    blkfront_sync(dev);
   1.204 +
   1.205 +    printk("close blk: backend at %s\n",dev->backend);
   1.206 +
   1.207 +    snprintf(path, sizeof(path), "%s/state", dev->backend);
   1.208 +    err = xenbus_printf(XBT_NIL, nodename, "state", "%u", 5); /* closing */
   1.209 +    xenbus_wait_for_value(path,"5");
   1.210 +
   1.211 +    err = xenbus_printf(XBT_NIL, nodename, "state", "%u", 6);
   1.212 +    xenbus_wait_for_value(path,"6");
   1.213 +
   1.214 +    unbind_evtchn(dev->local_port);
   1.215 +
   1.216 +    free(nodename);
   1.217 +    free(dev->backend);
   1.218 +    free(dev);
   1.219 +}
   1.220 +
   1.221 +static void blkfront_wait_slot(struct blkfront_dev *dev)
   1.222 +{
   1.223 +    /* Wait for a slot */
   1.224 +    if (RING_FULL(&dev->ring)) {
   1.225 +	unsigned long flags;
   1.226 +	DEFINE_WAIT(w);
   1.227 +	local_irq_save(flags);
   1.228 +	while (1) {
   1.229 +	    blkfront_aio_poll(dev);
   1.230 +	    if (!RING_FULL(&dev->ring))
   1.231 +		break;
   1.232 +	    /* Really no slot, go to sleep. */
   1.233 +	    add_waiter(w, blkfront_queue);
   1.234 +	    local_irq_restore(flags);
   1.235 +	    schedule();
   1.236 +	    local_irq_save(flags);
   1.237 +	}
   1.238 +	remove_waiter(w);
   1.239 +	local_irq_restore(flags);
   1.240 +    }
   1.241 +}
   1.242 +
   1.243 +/* Issue an aio */
   1.244 +void blkfront_aio(struct blkfront_aiocb *aiocbp, int write)
   1.245 +{
   1.246 +    struct blkfront_dev *dev = aiocbp->aio_dev;
   1.247 +    struct blkif_request *req;
   1.248 +    RING_IDX i;
   1.249 +    int notify;
   1.250 +    int n, j;
   1.251 +    uintptr_t start, end;
   1.252 +
   1.253 +    // Can't io at non-sector-aligned location
   1.254 +    ASSERT(!(aiocbp->aio_offset & (dev->sector_size-1)));
   1.255 +    // Can't io non-sector-sized amounts
   1.256 +    ASSERT(!(aiocbp->aio_nbytes & (dev->sector_size-1)));
   1.257 +    // Can't io non-sector-aligned buffer
   1.258 +    ASSERT(!((uintptr_t) aiocbp->aio_buf & (dev->sector_size-1)));
   1.259 +
   1.260 +    start = (uintptr_t)aiocbp->aio_buf & PAGE_MASK;
   1.261 +    end = ((uintptr_t)aiocbp->aio_buf + aiocbp->aio_nbytes + PAGE_SIZE - 1) & PAGE_MASK;
   1.262 +    n = (end - start) / PAGE_SIZE;
   1.263 +
   1.264 +    /* qemu's IDE max multsect is 16 (8KB) and SCSI max DMA was set to 32KB,
   1.265 +     * so max 44KB can't happen */
   1.266 +    ASSERT(n <= BLKIF_MAX_SEGMENTS_PER_REQUEST);
   1.267 +
   1.268 +    blkfront_wait_slot(dev);
   1.269 +    i = dev->ring.req_prod_pvt;
   1.270 +    req = RING_GET_REQUEST(&dev->ring, i);
   1.271 +
   1.272 +    req->operation = write ? BLKIF_OP_WRITE : BLKIF_OP_READ;
   1.273 +    req->nr_segments = n;
   1.274 +    req->handle = dev->handle;
   1.275 +    req->id = (uintptr_t) aiocbp;
   1.276 +    req->sector_number = aiocbp->aio_offset / dev->sector_size;
   1.277 +
   1.278 +    for (j = 0; j < n; j++) {
   1.279 +	uintptr_t data = start + j * PAGE_SIZE;
   1.280 +	aiocbp->gref[j] = req->seg[j].gref =
   1.281 +            gnttab_grant_access(0, virt_to_mfn(data), write);
   1.282 +	req->seg[j].first_sect = 0;
   1.283 +	req->seg[j].last_sect = PAGE_SIZE / dev->sector_size - 1;
   1.284 +    }
   1.285 +    req->seg[0].first_sect = ((uintptr_t)aiocbp->aio_buf & ~PAGE_MASK) / dev->sector_size;
   1.286 +    req->seg[n-1].last_sect = (((uintptr_t)aiocbp->aio_buf + aiocbp->aio_nbytes - 1) & ~PAGE_MASK) / dev->sector_size;
   1.287 +
   1.288 +    dev->ring.req_prod_pvt = i + 1;
   1.289 +
   1.290 +    wmb();
   1.291 +    RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&dev->ring, notify);
   1.292 +
   1.293 +    if(notify) notify_remote_via_evtchn(dev->evtchn);
   1.294 +}
   1.295 +
   1.296 +void blkfront_aio_write(struct blkfront_aiocb *aiocbp)
   1.297 +{
   1.298 +    blkfront_aio(aiocbp, 1);
   1.299 +}
   1.300 +
   1.301 +void blkfront_aio_read(struct blkfront_aiocb *aiocbp)
   1.302 +{
   1.303 +    blkfront_aio(aiocbp, 0);
   1.304 +}
   1.305 +
   1.306 +int blkfront_aio_poll(struct blkfront_dev *dev)
   1.307 +{
   1.308 +    RING_IDX rp, cons;
   1.309 +    struct blkif_response *rsp;
   1.310 +
   1.311 +moretodo:
   1.312 +    rp = dev->ring.sring->rsp_prod;
   1.313 +    rmb(); /* Ensure we see queued responses up to 'rp'. */
   1.314 +    cons = dev->ring.rsp_cons;
   1.315 +
   1.316 +    int nr_consumed = 0;
   1.317 +    while ((cons != rp))
   1.318 +    {
   1.319 +	rsp = RING_GET_RESPONSE(&dev->ring, cons);
   1.320 +
   1.321 +        switch (rsp->operation) {
   1.322 +        case BLKIF_OP_READ:
   1.323 +        case BLKIF_OP_WRITE:
   1.324 +        {
   1.325 +            struct blkfront_aiocb *aiocbp = (void*) (uintptr_t) rsp->id;
   1.326 +            int n = (aiocbp->aio_nbytes + PAGE_SIZE - 1) / PAGE_SIZE, j;
   1.327 +            for (j = 0; j < n; j++)
   1.328 +                gnttab_end_access(aiocbp->gref[j]);
   1.329 +
   1.330 +            /* Nota: callback frees aiocbp itself */
   1.331 +            aiocbp->aio_cb(aiocbp, rsp->status ? -EIO : 0);
   1.332 +            break;
   1.333 +        }
   1.334 +        case BLKIF_OP_WRITE_BARRIER:
   1.335 +        case BLKIF_OP_FLUSH_DISKCACHE:
   1.336 +            break;
   1.337 +        default:
   1.338 +            printk("unrecognized block operation %d response\n", rsp->operation);
   1.339 +            break;
   1.340 +        }
   1.341 +
   1.342 +	nr_consumed++;
   1.343 +	++cons;
   1.344 +    }
   1.345 +    dev->ring.rsp_cons = cons;
   1.346 +
   1.347 +    int more;
   1.348 +    RING_FINAL_CHECK_FOR_RESPONSES(&dev->ring, more);
   1.349 +    if (more) goto moretodo;
   1.350 +
   1.351 +    return nr_consumed;
   1.352 +}
   1.353 +
   1.354 +static void blkfront_push_operation(struct blkfront_dev *dev, uint8_t op)
   1.355 +{
   1.356 +    int i;
   1.357 +    struct blkif_request *req;
   1.358 +    int notify;
   1.359 +
   1.360 +    blkfront_wait_slot(dev);
   1.361 +    i = dev->ring.req_prod_pvt;
   1.362 +    req = RING_GET_REQUEST(&dev->ring, i);
   1.363 +    req->operation = op;
   1.364 +    dev->ring.req_prod_pvt = i + 1;
   1.365 +    wmb();
   1.366 +    RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&dev->ring, notify);
   1.367 +    if (notify) notify_remote_via_evtchn(dev->evtchn);
   1.368 +}
   1.369 +
   1.370 +void blkfront_sync(struct blkfront_dev *dev)
   1.371 +{
   1.372 +    unsigned long flags;
   1.373 +
   1.374 +    if (dev->barrier == 1)
   1.375 +        blkfront_push_operation(dev, BLKIF_OP_WRITE_BARRIER);
   1.376 +
   1.377 +    if (dev->flush == 1)
   1.378 +        blkfront_push_operation(dev, BLKIF_OP_FLUSH_DISKCACHE);
   1.379 +
   1.380 +    /* Note: This won't finish if another thread enqueues requests.  */
   1.381 +    local_irq_save(flags);
   1.382 +    DEFINE_WAIT(w);
   1.383 +    while (1) {
   1.384 +	blkfront_aio_poll(dev);
   1.385 +	if (RING_FREE_REQUESTS(&dev->ring) == RING_SIZE(&dev->ring))
   1.386 +	    break;
   1.387 +
   1.388 +	add_waiter(w, blkfront_queue);
   1.389 +	local_irq_restore(flags);
   1.390 +	schedule();
   1.391 +	local_irq_save(flags);
   1.392 +    }
   1.393 +    remove_waiter(w);
   1.394 +    local_irq_restore(flags);
   1.395 +}
     2.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.2 +++ b/extras/mini-os/include/blkfront.h	Thu Jan 17 16:41:46 2008 +0000
     2.3 @@ -0,0 +1,26 @@
     2.4 +#include <wait.h>
     2.5 +#include <xen/io/blkif.h>
     2.6 +#include <types.h>
     2.7 +struct blkfront_dev;
     2.8 +struct blkfront_aiocb
     2.9 +{
    2.10 +    struct blkfront_dev *aio_dev;
    2.11 +    uint8_t *aio_buf;
    2.12 +    size_t aio_nbytes;
    2.13 +    uint64_t aio_offset;
    2.14 +    void *data;
    2.15 +
    2.16 +    grant_ref_t gref[BLKIF_MAX_SEGMENTS_PER_REQUEST];
    2.17 +
    2.18 +    void (*aio_cb)(struct blkfront_aiocb *aiocb, int ret);
    2.19 +};
    2.20 +struct blkfront_dev *init_blkfront(char *nodename, uint64_t *sectors, unsigned *sector_size, int *mode);
    2.21 +int blkfront_open(struct blkfront_dev *dev);
    2.22 +void blkfront_aio(struct blkfront_aiocb *aiocbp, int write);
    2.23 +void blkfront_aio_read(struct blkfront_aiocb *aiocbp);
    2.24 +void blkfront_aio_write(struct blkfront_aiocb *aiocbp);
    2.25 +int blkfront_aio_poll(struct blkfront_dev *dev);
    2.26 +void blkfront_sync(struct blkfront_dev *dev);
    2.27 +void shutdown_blkfront(struct blkfront_dev *dev);
    2.28 +
    2.29 +extern struct wait_queue_head blkfront_queue;