ia64/xen-unstable

view extras/mini-os/blkfront.c @ 17042:a905c582a406

Add stubdomain support. See stubdom/README for usage details.

- Move PAGE_SIZE and STACK_SIZE into __PAGE_SIZE and __STACK_SIZE in
arch_limits.h so as to permit getting them from there without
pulling all the internal Mini-OS defines.
- Setup a xen-elf cross-compilation environment in stubdom/cross-root
- Add a POSIX layer on top of Mini-OS by linking against the newlib C
library and lwIP, and implementing the Unixish part in mini-os/lib/sys.c
- Cross-compile zlib and libpci too.
- Add an xs.h-compatible layer on top of Mini-OS' xenbus.
- Cross-compile libxc with an additional xc_minios.c and a few things
disabled.
- Cross-compile ioemu with an additional block-vbd, but without sound,
tpm and other details. A few hacks are needed:
- Align ide and scsi buffers at least on sector size to permit
direct transmission to the block backend. While we are at it, just
page-align it to possibly save a segment. Also, limit the scsi
buffer size because of limitations of the block paravirtualization
protocol.
- Allocate big tables dynamically rather that letting them go to
bss: when Mini-OS gets installed in memory, bss is not lazily
allocated, and doing so during Mini-OS is unnecessarily trick while
we can simply use malloc.
- Had to change the Mini-OS compilation somehow, so as to export
Mini-OS compilation flags to the Makefiles of libxc and ioemu.

Signed-off-by: Samuel Thibault <samuel.thibault@eu.citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Tue Feb 12 14:35:39 2008 +0000 (2008-02-12)
parents 0672d5b11262
children c1003b9e0bb8
line source
1 /* Minimal block driver for Mini-OS.
2 * Copyright (c) 2007-2008 Samuel Thibault.
3 * Based on netfront.c.
4 */
6 #include <os.h>
7 #include <xenbus.h>
8 #include <events.h>
9 #include <errno.h>
10 #include <xen/io/blkif.h>
11 #include <gnttab.h>
12 #include <xmalloc.h>
13 #include <time.h>
14 #include <blkfront.h>
15 #include <lib.h>
16 #include <fcntl.h>
18 #ifndef HAVE_LIBC
19 #define strtoul simple_strtoul
20 #endif
22 /* Note: we generally don't need to disable IRQs since we hardly do anything in
23 * the interrupt handler. */
25 /* Note: we really suppose non-preemptive threads. */
27 DECLARE_WAIT_QUEUE_HEAD(blkfront_queue);
32 #define BLK_RING_SIZE __RING_SIZE((struct blkif_sring *)0, PAGE_SIZE)
33 #define GRANT_INVALID_REF 0
36 struct blk_buffer {
37 void* page;
38 grant_ref_t gref;
39 };
41 struct blkfront_dev {
42 domid_t dom;
44 struct blkif_front_ring ring;
45 grant_ref_t ring_ref;
46 evtchn_port_t evtchn, local_port;
47 blkif_vdev_t handle;
49 char *nodename;
50 char *backend;
51 unsigned sector_size;
52 unsigned sectors;
53 int mode;
54 int barrier;
55 int flush;
57 #ifdef HAVE_LIBC
58 int fd;
59 #endif
60 };
62 static inline int xenblk_rxidx(RING_IDX idx)
63 {
64 return idx & (BLK_RING_SIZE - 1);
65 }
67 void blkfront_handler(evtchn_port_t port, struct pt_regs *regs, void *data)
68 {
69 #ifdef HAVE_LIBC
70 struct blkfront_dev *dev = data;
71 int fd = dev->fd;
73 files[fd].read = 1;
74 #endif
75 wake_up(&blkfront_queue);
76 }
78 struct blkfront_dev *init_blkfront(char *nodename, uint64_t *sectors, unsigned *sector_size, int *mode)
79 {
80 xenbus_transaction_t xbt;
81 char* err;
82 char* message=NULL;
83 struct blkif_sring *s;
84 int retry=0;
85 char* msg;
86 char* c;
88 struct blkfront_dev *dev;
90 if (!nodename)
91 nodename = "device/vbd/768";
93 char path[strlen(nodename) + 1 + 10 + 1];
95 printk("******************* BLKFRONT for %s **********\n\n\n", nodename);
97 dev = malloc(sizeof(*dev));
98 dev->nodename = strdup(nodename);
100 evtchn_alloc_unbound_t op;
101 op.dom = DOMID_SELF;
102 snprintf(path, sizeof(path), "%s/backend-id", nodename);
103 dev->dom = op.remote_dom = xenbus_read_integer(path);
104 HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, &op);
105 clear_evtchn(op.port); /* Without, handler gets invoked now! */
106 dev->local_port = bind_evtchn(op.port, blkfront_handler, dev);
107 dev->evtchn=op.port;
109 s = (struct blkif_sring*) alloc_page();
110 memset(s,0,PAGE_SIZE);
113 SHARED_RING_INIT(s);
114 FRONT_RING_INIT(&dev->ring, s, PAGE_SIZE);
116 dev->ring_ref = gnttab_grant_access(dev->dom,virt_to_mfn(s),0);
118 // FIXME: proper frees on failures
119 again:
120 err = xenbus_transaction_start(&xbt);
121 if (err) {
122 printk("starting transaction\n");
123 }
125 err = xenbus_printf(xbt, nodename, "ring-ref","%u",
126 dev->ring_ref);
127 if (err) {
128 message = "writing ring-ref";
129 goto abort_transaction;
130 }
131 err = xenbus_printf(xbt, nodename,
132 "event-channel", "%u", dev->evtchn);
133 if (err) {
134 message = "writing event-channel";
135 goto abort_transaction;
136 }
138 err = xenbus_printf(xbt, nodename, "state", "%u",
139 4); /* connected */
142 err = xenbus_transaction_end(xbt, 0, &retry);
143 if (retry) {
144 goto again;
145 printk("completing transaction\n");
146 }
148 goto done;
150 abort_transaction:
151 xenbus_transaction_end(xbt, 1, &retry);
152 return NULL;
154 done:
156 snprintf(path, sizeof(path), "%s/backend", nodename);
157 msg = xenbus_read(XBT_NIL, path, &dev->backend);
158 if (msg) {
159 printk("Error %s when reading the backend path %s\n", msg, path);
160 return NULL;
161 }
163 printk("backend at %s\n", dev->backend);
165 dev->handle = strtoul(strrchr(nodename, '/')+1, NULL, 0);
167 {
168 char path[strlen(dev->backend) + 1 + 19 + 1];
169 snprintf(path, sizeof(path), "%s/mode", dev->backend);
170 msg = xenbus_read(XBT_NIL, path, &c);
171 if (msg) {
172 printk("Error %s when reading the mode\n", msg);
173 return NULL;
174 }
175 if (*c == 'w')
176 *mode = dev->mode = O_RDWR;
177 else
178 *mode = dev->mode = O_RDONLY;
179 free(c);
181 snprintf(path, sizeof(path), "%s/state", dev->backend);
183 xenbus_watch_path(XBT_NIL, path);
185 xenbus_wait_for_value(path,"4");
187 xenbus_unwatch_path(XBT_NIL, path);
189 snprintf(path, sizeof(path), "%s/sectors", dev->backend);
190 // FIXME: read_integer returns an int, so disk size limited to 1TB for now
191 *sectors = dev->sectors = xenbus_read_integer(path);
193 snprintf(path, sizeof(path), "%s/sector-size", dev->backend);
194 *sector_size = dev->sector_size = xenbus_read_integer(path);
196 snprintf(path, sizeof(path), "%s/feature-barrier", dev->backend);
197 dev->barrier = xenbus_read_integer(path);
199 snprintf(path, sizeof(path), "%s/feature-flush-cache", dev->backend);
200 dev->flush = xenbus_read_integer(path);
201 }
203 printk("%u sectors of %u bytes\n", dev->sectors, dev->sector_size);
204 printk("**************************\n");
206 return dev;
207 }
209 void shutdown_blkfront(struct blkfront_dev *dev)
210 {
211 char* err;
212 char *nodename = dev->nodename;
214 char path[strlen(dev->backend) + 1 + 5 + 1];
216 blkfront_sync(dev);
218 printk("close blk: backend at %s\n",dev->backend);
220 snprintf(path, sizeof(path), "%s/state", dev->backend);
221 err = xenbus_printf(XBT_NIL, nodename, "state", "%u", 5); /* closing */
222 xenbus_wait_for_value(path,"5");
224 err = xenbus_printf(XBT_NIL, nodename, "state", "%u", 6);
225 xenbus_wait_for_value(path,"6");
227 unbind_evtchn(dev->local_port);
229 free(nodename);
230 free(dev->backend);
231 free(dev);
232 }
234 static void blkfront_wait_slot(struct blkfront_dev *dev)
235 {
236 /* Wait for a slot */
237 if (RING_FULL(&dev->ring)) {
238 unsigned long flags;
239 DEFINE_WAIT(w);
240 local_irq_save(flags);
241 while (1) {
242 blkfront_aio_poll(dev);
243 if (!RING_FULL(&dev->ring))
244 break;
245 /* Really no slot, go to sleep. */
246 add_waiter(w, blkfront_queue);
247 local_irq_restore(flags);
248 schedule();
249 local_irq_save(flags);
250 }
251 remove_waiter(w);
252 local_irq_restore(flags);
253 }
254 }
256 /* Issue an aio */
257 void blkfront_aio(struct blkfront_aiocb *aiocbp, int write)
258 {
259 struct blkfront_dev *dev = aiocbp->aio_dev;
260 struct blkif_request *req;
261 RING_IDX i;
262 int notify;
263 int n, j;
264 uintptr_t start, end;
266 // Can't io at non-sector-aligned location
267 ASSERT(!(aiocbp->aio_offset & (dev->sector_size-1)));
268 // Can't io non-sector-sized amounts
269 ASSERT(!(aiocbp->aio_nbytes & (dev->sector_size-1)));
270 // Can't io non-sector-aligned buffer
271 ASSERT(!((uintptr_t) aiocbp->aio_buf & (dev->sector_size-1)));
273 start = (uintptr_t)aiocbp->aio_buf & PAGE_MASK;
274 end = ((uintptr_t)aiocbp->aio_buf + aiocbp->aio_nbytes + PAGE_SIZE - 1) & PAGE_MASK;
275 aiocbp->n = n = (end - start) / PAGE_SIZE;
277 /* qemu's IDE max multsect is 16 (8KB) and SCSI max DMA was set to 32KB,
278 * so max 44KB can't happen */
279 ASSERT(n <= BLKIF_MAX_SEGMENTS_PER_REQUEST);
281 blkfront_wait_slot(dev);
282 i = dev->ring.req_prod_pvt;
283 req = RING_GET_REQUEST(&dev->ring, i);
285 req->operation = write ? BLKIF_OP_WRITE : BLKIF_OP_READ;
286 req->nr_segments = n;
287 req->handle = dev->handle;
288 req->id = (uintptr_t) aiocbp;
289 req->sector_number = aiocbp->aio_offset / dev->sector_size;
291 for (j = 0; j < n; j++) {
292 uintptr_t data = start + j * PAGE_SIZE;
293 if (!write) {
294 /* Trigger CoW if needed */
295 *(char*)data = 0;
296 barrier();
297 }
298 aiocbp->gref[j] = req->seg[j].gref =
299 gnttab_grant_access(dev->dom, virtual_to_mfn(data), write);
300 req->seg[j].first_sect = 0;
301 req->seg[j].last_sect = PAGE_SIZE / dev->sector_size - 1;
302 }
303 req->seg[0].first_sect = ((uintptr_t)aiocbp->aio_buf & ~PAGE_MASK) / dev->sector_size;
304 req->seg[n-1].last_sect = (((uintptr_t)aiocbp->aio_buf + aiocbp->aio_nbytes - 1) & ~PAGE_MASK) / dev->sector_size;
306 dev->ring.req_prod_pvt = i + 1;
308 wmb();
309 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&dev->ring, notify);
311 if(notify) notify_remote_via_evtchn(dev->evtchn);
312 }
314 void blkfront_aio_write(struct blkfront_aiocb *aiocbp)
315 {
316 blkfront_aio(aiocbp, 1);
317 }
319 void blkfront_aio_read(struct blkfront_aiocb *aiocbp)
320 {
321 blkfront_aio(aiocbp, 0);
322 }
324 int blkfront_aio_poll(struct blkfront_dev *dev)
325 {
326 RING_IDX rp, cons;
327 struct blkif_response *rsp;
329 moretodo:
330 rp = dev->ring.sring->rsp_prod;
331 rmb(); /* Ensure we see queued responses up to 'rp'. */
332 cons = dev->ring.rsp_cons;
334 int nr_consumed = 0;
335 while ((cons != rp))
336 {
337 rsp = RING_GET_RESPONSE(&dev->ring, cons);
339 if (rsp->status != BLKIF_RSP_OKAY)
340 printk("block error %d for op %d\n", rsp->status, rsp->operation);
342 switch (rsp->operation) {
343 case BLKIF_OP_READ:
344 case BLKIF_OP_WRITE:
345 {
346 struct blkfront_aiocb *aiocbp = (void*) (uintptr_t) rsp->id;
347 int j;
349 for (j = 0; j < aiocbp->n; j++)
350 gnttab_end_access(aiocbp->gref[j]);
352 /* Nota: callback frees aiocbp itself */
353 aiocbp->aio_cb(aiocbp, rsp->status ? -EIO : 0);
354 break;
355 }
356 case BLKIF_OP_WRITE_BARRIER:
357 case BLKIF_OP_FLUSH_DISKCACHE:
358 break;
359 default:
360 printk("unrecognized block operation %d response\n", rsp->operation);
361 break;
362 }
364 nr_consumed++;
365 ++cons;
366 }
367 dev->ring.rsp_cons = cons;
369 int more;
370 RING_FINAL_CHECK_FOR_RESPONSES(&dev->ring, more);
371 if (more) goto moretodo;
373 return nr_consumed;
374 }
376 static void blkfront_push_operation(struct blkfront_dev *dev, uint8_t op)
377 {
378 int i;
379 struct blkif_request *req;
380 int notify;
382 blkfront_wait_slot(dev);
383 i = dev->ring.req_prod_pvt;
384 req = RING_GET_REQUEST(&dev->ring, i);
385 req->operation = op;
386 req->nr_segments = 0;
387 req->handle = dev->handle;
388 /* Not used */
389 req->id = 0;
390 /* Not needed anyway, but the backend will check it */
391 req->sector_number = 0;
392 dev->ring.req_prod_pvt = i + 1;
393 wmb();
394 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&dev->ring, notify);
395 if (notify) notify_remote_via_evtchn(dev->evtchn);
396 }
398 void blkfront_sync(struct blkfront_dev *dev)
399 {
400 unsigned long flags;
402 if (dev->mode == O_RDWR) {
403 if (dev->barrier == 1)
404 blkfront_push_operation(dev, BLKIF_OP_WRITE_BARRIER);
406 if (dev->flush == 1)
407 blkfront_push_operation(dev, BLKIF_OP_FLUSH_DISKCACHE);
408 }
410 /* Note: This won't finish if another thread enqueues requests. */
411 local_irq_save(flags);
412 DEFINE_WAIT(w);
413 while (1) {
414 blkfront_aio_poll(dev);
415 if (RING_FREE_REQUESTS(&dev->ring) == RING_SIZE(&dev->ring))
416 break;
418 add_waiter(w, blkfront_queue);
419 local_irq_restore(flags);
420 schedule();
421 local_irq_save(flags);
422 }
423 remove_waiter(w);
424 local_irq_restore(flags);
425 }
427 #ifdef HAVE_LIBC
428 int blkfront_open(struct blkfront_dev *dev)
429 {
430 dev->fd = alloc_fd(FTYPE_BLK);
431 printk("blk_open(%s) -> %d\n", dev->nodename, dev->fd);
432 files[dev->fd].blk.dev = dev;
433 return dev->fd;
434 }
435 #endif