ia64/xen-unstable

view extras/mini-os/blkfront.c @ 19557:226ef307cd2e

AMD IOMMU: Fix ioapic interrupt remapping

A few ioapic redirection entries are initialized by hypervisor before
enabling iommu hardware. This patch copies those entries from ioapic
redirection table into interrupt remapping table after interrupt
remapping table has been allocated.

Signed-off-by: Wei Wang <wei.wang2@amd.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Apr 17 13:16:39 2009 +0100 (2009-04-17)
parents cbaae05c2902
children c7d2f046aa29
line source
1 /* Minimal block driver for Mini-OS.
2 * Copyright (c) 2007-2008 Samuel Thibault.
3 * Based on netfront.c.
4 */
6 #include <os.h>
7 #include <xenbus.h>
8 #include <events.h>
9 #include <errno.h>
10 #include <xen/io/blkif.h>
11 #include <xen/io/protocols.h>
12 #include <gnttab.h>
13 #include <xmalloc.h>
14 #include <time.h>
15 #include <blkfront.h>
16 #include <lib.h>
17 #include <fcntl.h>
19 #ifndef HAVE_LIBC
20 #define strtoul simple_strtoul
21 #endif
23 /* Note: we generally don't need to disable IRQs since we hardly do anything in
24 * the interrupt handler. */
26 /* Note: we really suppose non-preemptive threads. */
28 DECLARE_WAIT_QUEUE_HEAD(blkfront_queue);
33 #define BLK_RING_SIZE __RING_SIZE((struct blkif_sring *)0, PAGE_SIZE)
34 #define GRANT_INVALID_REF 0
37 struct blk_buffer {
38 void* page;
39 grant_ref_t gref;
40 };
42 struct blkfront_dev {
43 domid_t dom;
45 struct blkif_front_ring ring;
46 grant_ref_t ring_ref;
47 evtchn_port_t evtchn;
48 blkif_vdev_t handle;
50 char *nodename;
51 char *backend;
52 struct blkfront_info info;
54 xenbus_event_queue events;
56 #ifdef HAVE_LIBC
57 int fd;
58 #endif
59 };
61 void blkfront_handler(evtchn_port_t port, struct pt_regs *regs, void *data)
62 {
63 #ifdef HAVE_LIBC
64 struct blkfront_dev *dev = data;
65 int fd = dev->fd;
67 if (fd != -1)
68 files[fd].read = 1;
69 #endif
70 wake_up(&blkfront_queue);
71 }
73 static void free_blkfront(struct blkfront_dev *dev)
74 {
75 mask_evtchn(dev->evtchn);
77 free(dev->backend);
79 gnttab_end_access(dev->ring_ref);
80 free_page(dev->ring.sring);
82 unbind_evtchn(dev->evtchn);
84 free(dev->nodename);
85 free(dev);
86 }
88 struct blkfront_dev *init_blkfront(char *_nodename, struct blkfront_info *info)
89 {
90 xenbus_transaction_t xbt;
91 char* err;
92 char* message=NULL;
93 struct blkif_sring *s;
94 int retry=0;
95 char* msg;
96 char* c;
97 char* nodename = _nodename ? _nodename : "device/vbd/768";
99 struct blkfront_dev *dev;
101 char path[strlen(nodename) + 1 + 10 + 1];
103 printk("******************* BLKFRONT for %s **********\n\n\n", nodename);
105 dev = malloc(sizeof(*dev));
106 memset(dev, 0, sizeof(*dev));
107 dev->nodename = strdup(nodename);
108 #ifdef HAVE_LIBC
109 dev->fd = -1;
110 #endif
112 snprintf(path, sizeof(path), "%s/backend-id", nodename);
113 dev->dom = xenbus_read_integer(path);
114 evtchn_alloc_unbound(dev->dom, blkfront_handler, dev, &dev->evtchn);
116 s = (struct blkif_sring*) alloc_page();
117 memset(s,0,PAGE_SIZE);
120 SHARED_RING_INIT(s);
121 FRONT_RING_INIT(&dev->ring, s, PAGE_SIZE);
123 dev->ring_ref = gnttab_grant_access(dev->dom,virt_to_mfn(s),0);
125 dev->events = NULL;
127 again:
128 err = xenbus_transaction_start(&xbt);
129 if (err) {
130 printk("starting transaction\n");
131 }
133 err = xenbus_printf(xbt, nodename, "ring-ref","%u",
134 dev->ring_ref);
135 if (err) {
136 message = "writing ring-ref";
137 goto abort_transaction;
138 }
139 err = xenbus_printf(xbt, nodename,
140 "event-channel", "%u", dev->evtchn);
141 if (err) {
142 message = "writing event-channel";
143 goto abort_transaction;
144 }
145 err = xenbus_printf(xbt, nodename,
146 "protocol", "%s", XEN_IO_PROTO_ABI_NATIVE);
147 if (err) {
148 message = "writing protocol";
149 goto abort_transaction;
150 }
152 err = xenbus_printf(xbt, nodename, "state", "%u",
153 4); /* connected */
156 err = xenbus_transaction_end(xbt, 0, &retry);
157 if (retry) {
158 goto again;
159 printk("completing transaction\n");
160 }
162 goto done;
164 abort_transaction:
165 xenbus_transaction_end(xbt, 1, &retry);
166 goto error;
168 done:
170 snprintf(path, sizeof(path), "%s/backend", nodename);
171 msg = xenbus_read(XBT_NIL, path, &dev->backend);
172 if (msg) {
173 printk("Error %s when reading the backend path %s\n", msg, path);
174 goto error;
175 }
177 printk("backend at %s\n", dev->backend);
179 dev->handle = strtoul(strrchr(nodename, '/')+1, NULL, 0);
181 {
182 char path[strlen(dev->backend) + 1 + 19 + 1];
183 snprintf(path, sizeof(path), "%s/mode", dev->backend);
184 msg = xenbus_read(XBT_NIL, path, &c);
185 if (msg) {
186 printk("Error %s when reading the mode\n", msg);
187 goto error;
188 }
189 if (*c == 'w')
190 dev->info.mode = O_RDWR;
191 else
192 dev->info.mode = O_RDONLY;
193 free(c);
195 snprintf(path, sizeof(path), "%s/state", dev->backend);
197 xenbus_watch_path_token(XBT_NIL, path, path, &dev->events);
199 xenbus_wait_for_value(path, "4", &dev->events);
201 snprintf(path, sizeof(path), "%s/info", dev->backend);
202 dev->info.info = xenbus_read_integer(path);
204 snprintf(path, sizeof(path), "%s/sectors", dev->backend);
205 // FIXME: read_integer returns an int, so disk size limited to 1TB for now
206 dev->info.sectors = xenbus_read_integer(path);
208 snprintf(path, sizeof(path), "%s/sector-size", dev->backend);
209 dev->info.sector_size = xenbus_read_integer(path);
211 snprintf(path, sizeof(path), "%s/feature-barrier", dev->backend);
212 dev->info.barrier = xenbus_read_integer(path);
214 snprintf(path, sizeof(path), "%s/feature-flush-cache", dev->backend);
215 dev->info.flush = xenbus_read_integer(path);
217 *info = dev->info;
218 }
219 unmask_evtchn(dev->evtchn);
221 printk("%u sectors of %u bytes\n", dev->info.sectors, dev->info.sector_size);
222 printk("**************************\n");
224 return dev;
226 error:
227 free_blkfront(dev);
228 return NULL;
229 }
231 void shutdown_blkfront(struct blkfront_dev *dev)
232 {
233 char* err;
234 char *nodename = dev->nodename;
236 char path[strlen(dev->backend) + 1 + 5 + 1];
238 blkfront_sync(dev);
240 printk("close blk: backend at %s\n",dev->backend);
242 snprintf(path, sizeof(path), "%s/state", dev->backend);
243 err = xenbus_printf(XBT_NIL, nodename, "state", "%u", 5); /* closing */
244 xenbus_wait_for_value(path, "5", &dev->events);
246 err = xenbus_printf(XBT_NIL, nodename, "state", "%u", 6);
247 xenbus_wait_for_value(path, "6", &dev->events);
249 err = xenbus_printf(XBT_NIL, nodename, "state", "%u", 1);
250 xenbus_wait_for_value(path, "2", &dev->events);
252 xenbus_unwatch_path(XBT_NIL, path);
254 snprintf(path, sizeof(path), "%s/ring-ref", nodename);
255 xenbus_rm(XBT_NIL, path);
256 snprintf(path, sizeof(path), "%s/event-channel", nodename);
257 xenbus_rm(XBT_NIL, path);
259 free_blkfront(dev);
260 }
262 static void blkfront_wait_slot(struct blkfront_dev *dev)
263 {
264 /* Wait for a slot */
265 if (RING_FULL(&dev->ring)) {
266 unsigned long flags;
267 DEFINE_WAIT(w);
268 local_irq_save(flags);
269 while (1) {
270 blkfront_aio_poll(dev);
271 if (!RING_FULL(&dev->ring))
272 break;
273 /* Really no slot, go to sleep. */
274 add_waiter(w, blkfront_queue);
275 local_irq_restore(flags);
276 schedule();
277 local_irq_save(flags);
278 }
279 remove_waiter(w);
280 local_irq_restore(flags);
281 }
282 }
284 /* Issue an aio */
285 void blkfront_aio(struct blkfront_aiocb *aiocbp, int write)
286 {
287 struct blkfront_dev *dev = aiocbp->aio_dev;
288 struct blkif_request *req;
289 RING_IDX i;
290 int notify;
291 int n, j;
292 uintptr_t start, end;
294 // Can't io at non-sector-aligned location
295 ASSERT(!(aiocbp->aio_offset & (dev->info.sector_size-1)));
296 // Can't io non-sector-sized amounts
297 ASSERT(!(aiocbp->aio_nbytes & (dev->info.sector_size-1)));
298 // Can't io non-sector-aligned buffer
299 ASSERT(!((uintptr_t) aiocbp->aio_buf & (dev->info.sector_size-1)));
301 start = (uintptr_t)aiocbp->aio_buf & PAGE_MASK;
302 end = ((uintptr_t)aiocbp->aio_buf + aiocbp->aio_nbytes + PAGE_SIZE - 1) & PAGE_MASK;
303 aiocbp->n = n = (end - start) / PAGE_SIZE;
305 /* qemu's IDE max multsect is 16 (8KB) and SCSI max DMA was set to 32KB,
306 * so max 44KB can't happen */
307 ASSERT(n <= BLKIF_MAX_SEGMENTS_PER_REQUEST);
309 blkfront_wait_slot(dev);
310 i = dev->ring.req_prod_pvt;
311 req = RING_GET_REQUEST(&dev->ring, i);
313 req->operation = write ? BLKIF_OP_WRITE : BLKIF_OP_READ;
314 req->nr_segments = n;
315 req->handle = dev->handle;
316 req->id = (uintptr_t) aiocbp;
317 req->sector_number = aiocbp->aio_offset / dev->info.sector_size;
319 for (j = 0; j < n; j++) {
320 req->seg[j].first_sect = 0;
321 req->seg[j].last_sect = PAGE_SIZE / dev->info.sector_size - 1;
322 }
323 req->seg[0].first_sect = ((uintptr_t)aiocbp->aio_buf & ~PAGE_MASK) / dev->info.sector_size;
324 req->seg[n-1].last_sect = (((uintptr_t)aiocbp->aio_buf + aiocbp->aio_nbytes - 1) & ~PAGE_MASK) / dev->info.sector_size;
325 for (j = 0; j < n; j++) {
326 uintptr_t data = start + j * PAGE_SIZE;
327 if (!write) {
328 /* Trigger CoW if needed */
329 *(char*)(data + (req->seg[j].first_sect << 9)) = 0;
330 barrier();
331 }
332 aiocbp->gref[j] = req->seg[j].gref =
333 gnttab_grant_access(dev->dom, virtual_to_mfn(data), write);
334 }
336 dev->ring.req_prod_pvt = i + 1;
338 wmb();
339 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&dev->ring, notify);
341 if(notify) notify_remote_via_evtchn(dev->evtchn);
342 }
344 static void blkfront_aio_cb(struct blkfront_aiocb *aiocbp, int ret)
345 {
346 aiocbp->data = (void*) 1;
347 }
349 void blkfront_io(struct blkfront_aiocb *aiocbp, int write)
350 {
351 unsigned long flags;
352 DEFINE_WAIT(w);
354 ASSERT(!aiocbp->aio_cb);
355 aiocbp->aio_cb = blkfront_aio_cb;
356 blkfront_aio(aiocbp, write);
357 aiocbp->data = NULL;
359 local_irq_save(flags);
360 while (1) {
361 blkfront_aio_poll(aiocbp->aio_dev);
362 if (aiocbp->data)
363 break;
365 add_waiter(w, blkfront_queue);
366 local_irq_restore(flags);
367 schedule();
368 local_irq_save(flags);
369 }
370 remove_waiter(w);
371 local_irq_restore(flags);
372 }
374 static void blkfront_push_operation(struct blkfront_dev *dev, uint8_t op, uint64_t id)
375 {
376 int i;
377 struct blkif_request *req;
378 int notify;
380 blkfront_wait_slot(dev);
381 i = dev->ring.req_prod_pvt;
382 req = RING_GET_REQUEST(&dev->ring, i);
383 req->operation = op;
384 req->nr_segments = 0;
385 req->handle = dev->handle;
386 req->id = id;
387 /* Not needed anyway, but the backend will check it */
388 req->sector_number = 0;
389 dev->ring.req_prod_pvt = i + 1;
390 wmb();
391 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&dev->ring, notify);
392 if (notify) notify_remote_via_evtchn(dev->evtchn);
393 }
395 void blkfront_aio_push_operation(struct blkfront_aiocb *aiocbp, uint8_t op)
396 {
397 struct blkfront_dev *dev = aiocbp->aio_dev;
398 blkfront_push_operation(dev, op, (uintptr_t) aiocbp);
399 }
401 void blkfront_sync(struct blkfront_dev *dev)
402 {
403 unsigned long flags;
404 DEFINE_WAIT(w);
406 if (dev->info.mode == O_RDWR) {
407 if (dev->info.barrier == 1)
408 blkfront_push_operation(dev, BLKIF_OP_WRITE_BARRIER, 0);
410 if (dev->info.flush == 1)
411 blkfront_push_operation(dev, BLKIF_OP_FLUSH_DISKCACHE, 0);
412 }
414 /* Note: This won't finish if another thread enqueues requests. */
415 local_irq_save(flags);
416 while (1) {
417 blkfront_aio_poll(dev);
418 if (RING_FREE_REQUESTS(&dev->ring) == RING_SIZE(&dev->ring))
419 break;
421 add_waiter(w, blkfront_queue);
422 local_irq_restore(flags);
423 schedule();
424 local_irq_save(flags);
425 }
426 remove_waiter(w);
427 local_irq_restore(flags);
428 }
430 int blkfront_aio_poll(struct blkfront_dev *dev)
431 {
432 RING_IDX rp, cons;
433 struct blkif_response *rsp;
434 int more;
435 int nr_consumed;
437 moretodo:
438 #ifdef HAVE_LIBC
439 if (dev->fd != -1) {
440 files[dev->fd].read = 0;
441 mb(); /* Make sure to let the handler set read to 1 before we start looking at the ring */
442 }
443 #endif
445 rp = dev->ring.sring->rsp_prod;
446 rmb(); /* Ensure we see queued responses up to 'rp'. */
447 cons = dev->ring.rsp_cons;
449 nr_consumed = 0;
450 while ((cons != rp))
451 {
452 struct blkfront_aiocb *aiocbp;
453 int status;
455 rsp = RING_GET_RESPONSE(&dev->ring, cons);
456 nr_consumed++;
458 aiocbp = (void*) (uintptr_t) rsp->id;
459 status = rsp->status;
461 if (status != BLKIF_RSP_OKAY)
462 printk("block error %d for op %d\n", status, rsp->operation);
464 switch (rsp->operation) {
465 case BLKIF_OP_READ:
466 case BLKIF_OP_WRITE:
467 {
468 int j;
470 for (j = 0; j < aiocbp->n; j++)
471 gnttab_end_access(aiocbp->gref[j]);
473 break;
474 }
476 case BLKIF_OP_WRITE_BARRIER:
477 case BLKIF_OP_FLUSH_DISKCACHE:
478 break;
480 default:
481 printk("unrecognized block operation %d response\n", rsp->operation);
482 }
484 dev->ring.rsp_cons = ++cons;
485 /* Nota: callback frees aiocbp itself */
486 if (aiocbp && aiocbp->aio_cb)
487 aiocbp->aio_cb(aiocbp, status ? -EIO : 0);
488 if (dev->ring.rsp_cons != cons)
489 /* We reentered, we must not continue here */
490 break;
491 }
493 RING_FINAL_CHECK_FOR_RESPONSES(&dev->ring, more);
494 if (more) goto moretodo;
496 return nr_consumed;
497 }
499 #ifdef HAVE_LIBC
500 int blkfront_open(struct blkfront_dev *dev)
501 {
502 dev->fd = alloc_fd(FTYPE_BLK);
503 printk("blk_open(%s) -> %d\n", dev->nodename, dev->fd);
504 files[dev->fd].blk.dev = dev;
505 return dev->fd;
506 }
507 #endif