ia64/xen-unstable

view extras/mini-os/blkfront.c @ 16828:623d668b3029

minios, blkfront: set default block path.
remove unnecessary path head stripping.

Signed-off-by: Samuel Thibault <samuel.thibault@eu.citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Tue Jan 22 09:46:15 2008 +0000 (2008-01-22)
parents 764d6741de07
children 898ce9341e6b
line source
1 /* Minimal block driver for Mini-OS.
2 * Copyright (c) 2007-2008 Samuel Thibault.
3 * Based on netfront.c.
4 */
6 #include <os.h>
7 #include <xenbus.h>
8 #include <events.h>
9 #include <errno.h>
10 #include <xen/io/blkif.h>
11 #include <gnttab.h>
12 #include <xmalloc.h>
13 #include <time.h>
14 #include <blkfront.h>
15 #include <lib.h>
16 #include <fcntl.h>
18 /* Note: we generally don't need to disable IRQs since we hardly do anything in
19 * the interrupt handler. */
21 /* Note: we really suppose non-preemptive threads. */
23 DECLARE_WAIT_QUEUE_HEAD(blkfront_queue);
28 #define BLK_RING_SIZE __RING_SIZE((struct blkif_sring *)0, PAGE_SIZE)
29 #define GRANT_INVALID_REF 0
32 struct blk_buffer {
33 void* page;
34 grant_ref_t gref;
35 };
37 struct blkfront_dev {
38 struct blkif_front_ring ring;
39 grant_ref_t ring_ref;
40 evtchn_port_t evtchn, local_port;
41 blkif_vdev_t handle;
43 char *nodename;
44 char *backend;
45 unsigned sector_size;
46 unsigned sectors;
47 int mode;
48 int barrier;
49 int flush;
50 };
52 static inline int xenblk_rxidx(RING_IDX idx)
53 {
54 return idx & (BLK_RING_SIZE - 1);
55 }
57 void blkfront_handler(evtchn_port_t port, struct pt_regs *regs, void *data)
58 {
59 wake_up(&blkfront_queue);
60 }
62 struct blkfront_dev *init_blkfront(char *nodename, uint64_t *sectors, unsigned *sector_size, int *mode)
63 {
64 xenbus_transaction_t xbt;
65 char* err;
66 char* message=NULL;
67 struct blkif_sring *s;
68 int retry=0;
69 char* msg;
70 char* c;
72 struct blkfront_dev *dev;
74 if (!nodename)
75 nodename = "device/vbd/768";
77 char path[strlen(nodename) + 1 + 10 + 1];
79 printk("******************* BLKFRONT for %s **********\n\n\n", nodename);
81 dev = malloc(sizeof(*dev));
82 dev->nodename = strdup(nodename);
84 s = (struct blkif_sring*) alloc_page();
85 memset(s,0,PAGE_SIZE);
88 SHARED_RING_INIT(s);
89 FRONT_RING_INIT(&dev->ring, s, PAGE_SIZE);
91 dev->ring_ref = gnttab_grant_access(0,virt_to_mfn(s),0);
93 evtchn_alloc_unbound_t op;
94 op.dom = DOMID_SELF;
95 snprintf(path, sizeof(path), "%s/backend-id", nodename);
96 op.remote_dom = xenbus_read_integer(path);
97 HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, &op);
98 clear_evtchn(op.port); /* Without, handler gets invoked now! */
99 dev->local_port = bind_evtchn(op.port, blkfront_handler, dev);
100 dev->evtchn=op.port;
102 // FIXME: proper frees on failures
103 again:
104 err = xenbus_transaction_start(&xbt);
105 if (err) {
106 printk("starting transaction\n");
107 }
109 err = xenbus_printf(xbt, nodename, "ring-ref","%u",
110 dev->ring_ref);
111 if (err) {
112 message = "writing ring-ref";
113 goto abort_transaction;
114 }
115 err = xenbus_printf(xbt, nodename,
116 "event-channel", "%u", dev->evtchn);
117 if (err) {
118 message = "writing event-channel";
119 goto abort_transaction;
120 }
122 err = xenbus_printf(xbt, nodename, "state", "%u",
123 4); /* connected */
126 err = xenbus_transaction_end(xbt, 0, &retry);
127 if (retry) {
128 goto again;
129 printk("completing transaction\n");
130 }
132 goto done;
134 abort_transaction:
135 xenbus_transaction_end(xbt, 1, &retry);
136 return NULL;
138 done:
140 snprintf(path, sizeof(path), "%s/backend", nodename);
141 msg = xenbus_read(XBT_NIL, path, &dev->backend);
142 if (msg) {
143 printk("Error %s when reading the backend path %s\n", msg, path);
144 return NULL;
145 }
147 printk("backend at %s\n", dev->backend);
149 dev->handle = simple_strtoul(strrchr(nodename, '/')+1, NULL, 0);
151 {
152 char path[strlen(dev->backend) + 1 + 19 + 1];
153 snprintf(path, sizeof(path), "%s/mode", dev->backend);
154 msg = xenbus_read(XBT_NIL, path, &c);
155 if (msg) {
156 printk("Error %s when reading the mode\n", msg);
157 return NULL;
158 }
159 if (*c == 'w')
160 *mode = dev->mode = O_RDWR;
161 else
162 *mode = dev->mode = O_RDONLY;
163 free(c);
165 snprintf(path, sizeof(path), "%s/state", dev->backend);
167 xenbus_watch_path(XBT_NIL, path);
169 xenbus_wait_for_value(path,"4");
171 xenbus_unwatch_path(XBT_NIL, path);
173 snprintf(path, sizeof(path), "%s/sectors", dev->backend);
174 // FIXME: read_integer returns an int, so disk size limited to 1TB for now
175 *sectors = dev->sectors = xenbus_read_integer(path);
177 snprintf(path, sizeof(path), "%s/sector-size", dev->backend);
178 *sector_size = dev->sector_size = xenbus_read_integer(path);
180 snprintf(path, sizeof(path), "%s/feature-barrier", dev->backend);
181 dev->barrier = xenbus_read_integer(path);
183 snprintf(path, sizeof(path), "%s/feature-flush-cache", dev->backend);
184 dev->flush = xenbus_read_integer(path);
185 }
187 printk("%u sectors of %u bytes\n", dev->sectors, dev->sector_size);
188 printk("**************************\n");
190 return dev;
191 }
193 void shutdown_blkfront(struct blkfront_dev *dev)
194 {
195 char* err;
196 char *nodename = dev->nodename;
198 char path[strlen(dev->backend) + 1 + 5 + 1];
200 blkfront_sync(dev);
202 printk("close blk: backend at %s\n",dev->backend);
204 snprintf(path, sizeof(path), "%s/state", dev->backend);
205 err = xenbus_printf(XBT_NIL, nodename, "state", "%u", 5); /* closing */
206 xenbus_wait_for_value(path,"5");
208 err = xenbus_printf(XBT_NIL, nodename, "state", "%u", 6);
209 xenbus_wait_for_value(path,"6");
211 unbind_evtchn(dev->local_port);
213 free(nodename);
214 free(dev->backend);
215 free(dev);
216 }
218 static void blkfront_wait_slot(struct blkfront_dev *dev)
219 {
220 /* Wait for a slot */
221 if (RING_FULL(&dev->ring)) {
222 unsigned long flags;
223 DEFINE_WAIT(w);
224 local_irq_save(flags);
225 while (1) {
226 blkfront_aio_poll(dev);
227 if (!RING_FULL(&dev->ring))
228 break;
229 /* Really no slot, go to sleep. */
230 add_waiter(w, blkfront_queue);
231 local_irq_restore(flags);
232 schedule();
233 local_irq_save(flags);
234 }
235 remove_waiter(w);
236 local_irq_restore(flags);
237 }
238 }
240 /* Issue an aio */
241 void blkfront_aio(struct blkfront_aiocb *aiocbp, int write)
242 {
243 struct blkfront_dev *dev = aiocbp->aio_dev;
244 struct blkif_request *req;
245 RING_IDX i;
246 int notify;
247 int n, j;
248 uintptr_t start, end;
250 // Can't io at non-sector-aligned location
251 ASSERT(!(aiocbp->aio_offset & (dev->sector_size-1)));
252 // Can't io non-sector-sized amounts
253 ASSERT(!(aiocbp->aio_nbytes & (dev->sector_size-1)));
254 // Can't io non-sector-aligned buffer
255 ASSERT(!((uintptr_t) aiocbp->aio_buf & (dev->sector_size-1)));
257 start = (uintptr_t)aiocbp->aio_buf & PAGE_MASK;
258 end = ((uintptr_t)aiocbp->aio_buf + aiocbp->aio_nbytes + PAGE_SIZE - 1) & PAGE_MASK;
259 n = (end - start) / PAGE_SIZE;
261 /* qemu's IDE max multsect is 16 (8KB) and SCSI max DMA was set to 32KB,
262 * so max 44KB can't happen */
263 ASSERT(n <= BLKIF_MAX_SEGMENTS_PER_REQUEST);
265 blkfront_wait_slot(dev);
266 i = dev->ring.req_prod_pvt;
267 req = RING_GET_REQUEST(&dev->ring, i);
269 req->operation = write ? BLKIF_OP_WRITE : BLKIF_OP_READ;
270 req->nr_segments = n;
271 req->handle = dev->handle;
272 req->id = (uintptr_t) aiocbp;
273 req->sector_number = aiocbp->aio_offset / dev->sector_size;
275 for (j = 0; j < n; j++) {
276 uintptr_t data = start + j * PAGE_SIZE;
277 aiocbp->gref[j] = req->seg[j].gref =
278 gnttab_grant_access(0, virt_to_mfn(data), write);
279 req->seg[j].first_sect = 0;
280 req->seg[j].last_sect = PAGE_SIZE / dev->sector_size - 1;
281 }
282 req->seg[0].first_sect = ((uintptr_t)aiocbp->aio_buf & ~PAGE_MASK) / dev->sector_size;
283 req->seg[n-1].last_sect = (((uintptr_t)aiocbp->aio_buf + aiocbp->aio_nbytes - 1) & ~PAGE_MASK) / dev->sector_size;
285 dev->ring.req_prod_pvt = i + 1;
287 wmb();
288 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&dev->ring, notify);
290 if(notify) notify_remote_via_evtchn(dev->evtchn);
291 }
293 void blkfront_aio_write(struct blkfront_aiocb *aiocbp)
294 {
295 blkfront_aio(aiocbp, 1);
296 }
298 void blkfront_aio_read(struct blkfront_aiocb *aiocbp)
299 {
300 blkfront_aio(aiocbp, 0);
301 }
303 int blkfront_aio_poll(struct blkfront_dev *dev)
304 {
305 RING_IDX rp, cons;
306 struct blkif_response *rsp;
308 moretodo:
309 rp = dev->ring.sring->rsp_prod;
310 rmb(); /* Ensure we see queued responses up to 'rp'. */
311 cons = dev->ring.rsp_cons;
313 int nr_consumed = 0;
314 while ((cons != rp))
315 {
316 rsp = RING_GET_RESPONSE(&dev->ring, cons);
318 switch (rsp->operation) {
319 case BLKIF_OP_READ:
320 case BLKIF_OP_WRITE:
321 {
322 struct blkfront_aiocb *aiocbp = (void*) (uintptr_t) rsp->id;
323 int n = (aiocbp->aio_nbytes + PAGE_SIZE - 1) / PAGE_SIZE, j;
324 for (j = 0; j < n; j++)
325 gnttab_end_access(aiocbp->gref[j]);
327 /* Nota: callback frees aiocbp itself */
328 aiocbp->aio_cb(aiocbp, rsp->status ? -EIO : 0);
329 break;
330 }
331 case BLKIF_OP_WRITE_BARRIER:
332 case BLKIF_OP_FLUSH_DISKCACHE:
333 break;
334 default:
335 printk("unrecognized block operation %d response\n", rsp->operation);
336 break;
337 }
339 nr_consumed++;
340 ++cons;
341 }
342 dev->ring.rsp_cons = cons;
344 int more;
345 RING_FINAL_CHECK_FOR_RESPONSES(&dev->ring, more);
346 if (more) goto moretodo;
348 return nr_consumed;
349 }
351 static void blkfront_push_operation(struct blkfront_dev *dev, uint8_t op)
352 {
353 int i;
354 struct blkif_request *req;
355 int notify;
357 blkfront_wait_slot(dev);
358 i = dev->ring.req_prod_pvt;
359 req = RING_GET_REQUEST(&dev->ring, i);
360 req->operation = op;
361 dev->ring.req_prod_pvt = i + 1;
362 wmb();
363 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&dev->ring, notify);
364 if (notify) notify_remote_via_evtchn(dev->evtchn);
365 }
367 void blkfront_sync(struct blkfront_dev *dev)
368 {
369 unsigned long flags;
371 if (dev->barrier == 1)
372 blkfront_push_operation(dev, BLKIF_OP_WRITE_BARRIER);
374 if (dev->flush == 1)
375 blkfront_push_operation(dev, BLKIF_OP_FLUSH_DISKCACHE);
377 /* Note: This won't finish if another thread enqueues requests. */
378 local_irq_save(flags);
379 DEFINE_WAIT(w);
380 while (1) {
381 blkfront_aio_poll(dev);
382 if (RING_FREE_REQUESTS(&dev->ring) == RING_SIZE(&dev->ring))
383 break;
385 add_waiter(w, blkfront_queue);
386 local_irq_restore(flags);
387 schedule();
388 local_irq_save(flags);
389 }
390 remove_waiter(w);
391 local_irq_restore(flags);
392 }