ia64/xen-unstable

view tools/blktap/blktaplib.c @ 6538:84ee014ebd41

Merge xen-vtx-unstable.hg
author adsharma@los-vmm.sc.intel.com
date Wed Aug 17 12:34:38 2005 -0800 (2005-08-17)
parents 23979fb12c49 8004acaa6684
children 99914b54f7bf
line source
1 /*
2 * blktaplib.c
3 *
4 * userspace interface routines for the blktap driver.
5 *
6 * (threadsafe(r) version)
7 *
8 * (c) 2004 Andrew Warfield.
9 */
11 #include <stdio.h>
12 #include <stdlib.h>
13 #include <sys/mman.h>
14 #include <sys/user.h>
15 #include <err.h>
16 #include <errno.h>
17 #include <sys/types.h>
18 #include <linux/types.h>
19 #include <sys/stat.h>
20 #include <fcntl.h>
21 #include <signal.h>
22 #include <sys/poll.h>
23 #include <sys/ioctl.h>
24 #include <string.h>
25 #include <unistd.h>
26 #include <pthread.h>
29 #define __COMPILING_BLKTAP_LIB
30 #include "blktaplib.h"
32 #if 0
33 #define DPRINTF(_f, _a...) printf ( _f , ## _a )
34 #else
35 #define DPRINTF(_f, _a...) ((void)0)
36 #endif
37 #define DEBUG_RING_IDXS 1
39 #define POLLRDNORM 0x040
41 #define BLKTAP_IOCTL_KICK 1
43 void got_sig_bus();
44 void got_sig_int();
46 /* in kernel these are opposite, but we are a consumer now. */
47 blkif_back_ring_t fe_ring; /* slightly counterintuitive ;) */
48 blkif_front_ring_t be_ring;
49 ctrl_back_ring_t ctrl_ring;
51 unsigned long mmap_vstart = 0;
52 char *blktap_mem;
53 int fd = 0;
55 #define BLKTAP_RING_PAGES 3 /* Ctrl, Back, Front */
56 /*#define BLKTAP_MMAP_PAGES ((11 + 1) * 64)*/
57 #define BLKTAP_MMAP_PAGES \
58 ((BLKIF_MAX_SEGMENTS_PER_REQUEST + 1) * BLKIF_RING_SIZE)
59 #define BLKTAP_MMAP_REGION_SIZE (BLKTAP_RING_PAGES + BLKTAP_MMAP_PAGES)
61 int bad_count = 0;
62 void bad(void)
63 {
64 bad_count ++;
65 if (bad_count > 50) exit(0);
66 }
67 /*-----[ ID Manipulation from tap driver code ]--------------------------*/
69 #define ACTIVE_RING_IDX unsigned short
71 inline unsigned long MAKE_ID(domid_t fe_dom, ACTIVE_RING_IDX idx)
72 {
73 return ( (fe_dom << 16) | idx );
74 }
76 inline unsigned int ID_TO_IDX(unsigned long id)
77 {
78 return ( id & 0x0000ffff );
79 }
81 inline domid_t ID_TO_DOM(unsigned long id) { return (id >> 16); }
82 /*
83 static int (*request_hook)(blkif_request_t *req) = NULL;
84 static int (*response_hook)(blkif_response_t *req) = NULL;
85 */
87 /*-----[ Request/Response hook chains.]----------------------------------*/
89 #define HOOK_NAME_MAX 50
91 typedef struct ctrl_hook_st {
92 char name[HOOK_NAME_MAX];
93 int (*func)(control_msg_t *);
94 struct ctrl_hook_st *next;
95 } ctrl_hook_t;
97 typedef struct request_hook_st {
98 char name[HOOK_NAME_MAX];
99 int (*func)(blkif_request_t *);
100 struct request_hook_st *next;
101 } request_hook_t;
103 typedef struct response_hook_st {
104 char name[HOOK_NAME_MAX];
105 int (*func)(blkif_response_t *);
106 struct response_hook_st *next;
107 } response_hook_t;
109 static ctrl_hook_t *ctrl_hook_chain = NULL;
110 static request_hook_t *request_hook_chain = NULL;
111 static response_hook_t *response_hook_chain = NULL;
113 void blktap_register_ctrl_hook(char *name, int (*ch)(control_msg_t *))
114 {
115 ctrl_hook_t *ch_ent, **c;
117 ch_ent = (ctrl_hook_t *)malloc(sizeof(ctrl_hook_t));
118 if (!ch_ent) { printf("couldn't allocate a new hook\n"); exit(-1); }
120 ch_ent->func = ch;
121 ch_ent->next = NULL;
122 strncpy(ch_ent->name, name, HOOK_NAME_MAX);
123 ch_ent->name[HOOK_NAME_MAX-1] = '\0';
125 c = &ctrl_hook_chain;
126 while (*c != NULL) {
127 c = &(*c)->next;
128 }
129 *c = ch_ent;
130 }
132 void blktap_register_request_hook(char *name, int (*rh)(blkif_request_t *))
133 {
134 request_hook_t *rh_ent, **c;
136 rh_ent = (request_hook_t *)malloc(sizeof(request_hook_t));
137 if (!rh_ent) { printf("couldn't allocate a new hook\n"); exit(-1); }
139 rh_ent->func = rh;
140 rh_ent->next = NULL;
141 strncpy(rh_ent->name, name, HOOK_NAME_MAX);
143 c = &request_hook_chain;
144 while (*c != NULL) {
145 c = &(*c)->next;
146 }
147 *c = rh_ent;
148 }
150 void blktap_register_response_hook(char *name, int (*rh)(blkif_response_t *))
151 {
152 response_hook_t *rh_ent, **c;
154 rh_ent = (response_hook_t *)malloc(sizeof(response_hook_t));
155 if (!rh_ent) { printf("couldn't allocate a new hook\n"); exit(-1); }
157 rh_ent->func = rh;
158 rh_ent->next = NULL;
159 strncpy(rh_ent->name, name, HOOK_NAME_MAX);
161 c = &response_hook_chain;
162 while (*c != NULL) {
163 c = &(*c)->next;
164 }
165 *c = rh_ent;
166 }
168 void print_hooks(void)
169 {
170 request_hook_t *req_hook;
171 response_hook_t *rsp_hook;
172 ctrl_hook_t *ctrl_hook;
174 DPRINTF("Control Hooks:\n");
175 ctrl_hook = ctrl_hook_chain;
176 while (ctrl_hook != NULL)
177 {
178 DPRINTF(" [0x%p] %s\n", ctrl_hook->func, ctrl_hook->name);
179 ctrl_hook = ctrl_hook->next;
180 }
182 DPRINTF("Request Hooks:\n");
183 req_hook = request_hook_chain;
184 while (req_hook != NULL)
185 {
186 DPRINTF(" [0x%p] %s\n", req_hook->func, req_hook->name);
187 req_hook = req_hook->next;
188 }
190 DPRINTF("Response Hooks:\n");
191 rsp_hook = response_hook_chain;
192 while (rsp_hook != NULL)
193 {
194 DPRINTF(" [0x%p] %s\n", rsp_hook->func, rsp_hook->name);
195 rsp_hook = rsp_hook->next;
196 }
197 }
199 /*-----[ Data to/from Backend (server) VM ]------------------------------*/
203 inline int write_req_to_be_ring(blkif_request_t *req)
204 {
205 blkif_request_t *req_d;
206 static pthread_mutex_t be_prod_mutex = PTHREAD_MUTEX_INITIALIZER;
208 pthread_mutex_lock(&be_prod_mutex);
209 req_d = RING_GET_REQUEST(&be_ring, be_ring.req_prod_pvt);
210 memcpy(req_d, req, sizeof(blkif_request_t));
211 wmb();
212 be_ring.req_prod_pvt++;
213 pthread_mutex_unlock(&be_prod_mutex);
215 return 0;
216 }
218 inline int write_rsp_to_fe_ring(blkif_response_t *rsp)
219 {
220 blkif_response_t *rsp_d;
221 static pthread_mutex_t fe_prod_mutex = PTHREAD_MUTEX_INITIALIZER;
223 pthread_mutex_lock(&fe_prod_mutex);
224 rsp_d = RING_GET_RESPONSE(&fe_ring, fe_ring.rsp_prod_pvt);
225 memcpy(rsp_d, rsp, sizeof(blkif_response_t));
226 wmb();
227 fe_ring.rsp_prod_pvt++;
228 pthread_mutex_unlock(&fe_prod_mutex);
230 return 0;
231 }
233 static void apply_rsp_hooks(blkif_response_t *rsp)
234 {
235 response_hook_t *rsp_hook;
237 rsp_hook = response_hook_chain;
238 while (rsp_hook != NULL)
239 {
240 switch(rsp_hook->func(rsp))
241 {
242 case BLKTAP_PASS:
243 break;
244 default:
245 printf("Only PASS is supported for resp hooks!\n");
246 }
247 rsp_hook = rsp_hook->next;
248 }
249 }
251 static pthread_mutex_t push_mutex = PTHREAD_MUTEX_INITIALIZER;
253 void blktap_inject_response(blkif_response_t *rsp)
254 {
256 apply_rsp_hooks(rsp);
258 write_rsp_to_fe_ring(rsp);
260 pthread_mutex_lock(&push_mutex);
262 RING_PUSH_RESPONSES(&fe_ring);
263 ioctl(fd, BLKTAP_IOCTL_KICK_FE);
265 pthread_mutex_unlock(&push_mutex);
266 }
268 /*-----[ Polling fd listeners ]------------------------------------------*/
270 #define MAX_POLLFDS 64
272 typedef struct {
273 int (*func)(int fd);
274 struct pollfd *pfd;
275 int fd;
276 short events;
277 int active;
278 } pollhook_t;
280 static struct pollfd pfd[MAX_POLLFDS+1];
281 static pollhook_t pollhooks[MAX_POLLFDS];
282 static unsigned int ph_freelist[MAX_POLLFDS];
283 static unsigned int ph_cons, ph_prod;
284 #define nr_pollhooks() (MAX_POLLFDS - (ph_prod - ph_cons))
285 #define PH_IDX(x) (x % MAX_POLLFDS)
287 int blktap_attach_poll(int fd, short events, int (*func)(int fd))
288 {
289 pollhook_t *ph;
291 if (nr_pollhooks() == MAX_POLLFDS) {
292 printf("Too many pollhooks!\n");
293 return -1;
294 }
296 ph = &pollhooks[ph_freelist[PH_IDX(ph_cons++)]];
298 ph->func = func;
299 ph->fd = fd;
300 ph->events = events;
301 ph->active = 1;
303 DPRINTF("Added fd %d at ph index %d, now %d phs.\n", fd, ph_cons-1,
304 nr_pollhooks());
306 return 0;
307 }
309 void blktap_detach_poll(int fd)
310 {
311 int i;
313 for (i=0; i<MAX_POLLFDS; i++)
314 if ((pollhooks[i].active) && (pollhooks[i].pfd->fd == fd)) {
315 ph_freelist[PH_IDX(ph_prod++)] = i;
316 pollhooks[i].pfd->fd = -1;
317 pollhooks[i].active = 0;
318 break;
319 }
321 DPRINTF("Removed fd %d at ph index %d, now %d phs.\n", fd, i,
322 nr_pollhooks());
323 }
325 void pollhook_init(void)
326 {
327 int i;
329 for (i=0; i < MAX_POLLFDS; i++) {
330 ph_freelist[i] = (i+1) % MAX_POLLFDS;
331 pollhooks[i].active = 0;
332 }
334 ph_cons = 0;
335 ph_prod = MAX_POLLFDS;
336 }
338 void __attribute__ ((constructor)) blktaplib_init(void)
339 {
340 pollhook_init();
341 }
343 /*-----[ The main listen loop ]------------------------------------------*/
345 int blktap_listen(void)
346 {
347 int notify_be, notify_fe, tap_pfd;
349 /* comms rings: */
350 blkif_request_t *req;
351 blkif_response_t *rsp;
352 control_msg_t *msg;
353 blkif_sring_t *sring;
354 ctrl_sring_t *csring;
355 RING_IDX rp, i, pfd_count;
357 /* pending rings */
358 blkif_request_t req_pending[BLKIF_RING_SIZE];
359 blkif_response_t rsp_pending[BLKIF_RING_SIZE];
361 /* handler hooks: */
362 request_hook_t *req_hook;
363 response_hook_t *rsp_hook;
364 ctrl_hook_t *ctrl_hook;
366 signal (SIGBUS, got_sig_bus);
367 signal (SIGINT, got_sig_int);
369 print_hooks();
371 fd = open("/dev/blktap", O_RDWR);
372 if (fd == -1) {
373 printf("open failed! (%d)\n", errno);
374 goto open_failed;
375 }
377 blktap_mem = mmap(0, PAGE_SIZE * BLKTAP_MMAP_REGION_SIZE,
378 PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
380 if ((int)blktap_mem == -1) {
381 printf("mmap failed! (%d)\n", errno);
382 goto mmap_failed;
383 }
385 /* assign the rings to the mapped memory */
386 csring = (ctrl_sring_t *)blktap_mem;
387 BACK_RING_INIT(&ctrl_ring, csring, PAGE_SIZE);
389 sring = (blkif_sring_t *)((unsigned long)blktap_mem + PAGE_SIZE);
390 FRONT_RING_INIT(&be_ring, sring, PAGE_SIZE);
392 sring = (blkif_sring_t *)((unsigned long)blktap_mem + (2 *PAGE_SIZE));
393 BACK_RING_INIT(&fe_ring, sring, PAGE_SIZE);
395 mmap_vstart = (unsigned long)blktap_mem +(BLKTAP_RING_PAGES << PAGE_SHIFT);
397 ioctl(fd, BLKTAP_IOCTL_SETMODE, BLKTAP_MODE_INTERPOSE );
399 while(1) {
400 int ret;
402 /* build the poll list */
404 DPRINTF("Building poll list.\n");
406 pfd_count = 0;
407 for ( i=0; i < MAX_POLLFDS; i++ ) {
408 pollhook_t *ph = &pollhooks[i];
410 if (ph->active) {
411 pfd[pfd_count].fd = ph->fd;
412 pfd[pfd_count].events = ph->events;
413 ph->pfd = &pfd[pfd_count];
414 pfd_count++;
415 }
416 }
418 tap_pfd = pfd_count;
419 pfd[tap_pfd].fd = fd;
420 pfd[tap_pfd].events = POLLIN;
422 DPRINTF("poll() %d fds.\n", pfd_count);
424 if ( (ret = (poll(pfd, pfd_count+1, 10000)) == 0) ) {
425 if (DEBUG_RING_IDXS)
426 ioctl(fd, BLKTAP_IOCTL_PRINT_IDXS);
427 continue;
428 }
430 DPRINTF("poll returned %d\n", ret);
432 for (i=0; i < MAX_POLLFDS; i++) {
433 if ( (pollhooks[i].active ) && (pollhooks[i].pfd->revents ) )
434 pollhooks[i].func(pollhooks[i].pfd->fd);
435 }
437 if (pfd[tap_pfd].revents) {
439 /* empty the control ring */
440 rp = ctrl_ring.sring->req_prod;
441 rmb();
442 for (i = ctrl_ring.req_cons; i < rp; i++)
443 {
444 msg = RING_GET_REQUEST(&ctrl_ring, i);
446 ctrl_hook = ctrl_hook_chain;
447 while (ctrl_hook != NULL)
448 {
449 DPRINTF("CTRL_HOOK: %s\n", ctrl_hook->name);
450 /* We currently don't respond to ctrl messages. */
451 ctrl_hook->func(msg);
452 ctrl_hook = ctrl_hook->next;
453 }
454 }
455 /* Using this as a unidirectional ring. */
456 ctrl_ring.req_cons = ctrl_ring.rsp_prod_pvt = i;
457 pthread_mutex_lock(&push_mutex);
458 RING_PUSH_RESPONSES(&ctrl_ring);
459 pthread_mutex_unlock(&push_mutex);
461 /* empty the fe_ring */
462 notify_fe = 0;
463 notify_be = RING_HAS_UNCONSUMED_REQUESTS(&fe_ring);
464 rp = fe_ring.sring->req_prod;
465 rmb();
466 for (i = fe_ring.req_cons; i != rp; i++)
467 {
468 int done = 0; /* stop forwarding this request */
470 req = RING_GET_REQUEST(&fe_ring, i);
471 memcpy(&req_pending[ID_TO_IDX(req->id)], req, sizeof(*req));
472 req = &req_pending[ID_TO_IDX(req->id)];
474 DPRINTF("copying an fe request\n");
476 req_hook = request_hook_chain;
477 while (req_hook != NULL)
478 {
479 DPRINTF("REQ_HOOK: %s\n", req_hook->name);
480 switch(req_hook->func(req))
481 {
482 case BLKTAP_RESPOND:
483 apply_rsp_hooks((blkif_response_t *)req);
484 write_rsp_to_fe_ring((blkif_response_t *)req);
485 notify_fe = 1;
486 done = 1;
487 break;
488 case BLKTAP_STOLEN:
489 done = 1;
490 break;
491 case BLKTAP_PASS:
492 break;
493 default:
494 printf("Unknown request hook return value!\n");
495 }
496 if (done) break;
497 req_hook = req_hook->next;
498 }
500 if (done == 0) write_req_to_be_ring(req);
502 }
503 fe_ring.req_cons = i;
505 /* empty the be_ring */
506 notify_fe |= RING_HAS_UNCONSUMED_RESPONSES(&be_ring);
507 rp = be_ring.sring->rsp_prod;
508 rmb();
509 for (i = be_ring.rsp_cons; i != rp; i++)
510 {
512 rsp = RING_GET_RESPONSE(&be_ring, i);
513 memcpy(&rsp_pending[ID_TO_IDX(rsp->id)], rsp, sizeof(*rsp));
514 rsp = &rsp_pending[ID_TO_IDX(rsp->id)];
516 DPRINTF("copying a be request\n");
518 apply_rsp_hooks(rsp);
519 write_rsp_to_fe_ring(rsp);
520 }
521 be_ring.rsp_cons = i;
523 /* notify the domains */
525 if (notify_be) {
526 DPRINTF("notifying be\n");
527 pthread_mutex_lock(&push_mutex);
528 RING_PUSH_REQUESTS(&be_ring);
529 ioctl(fd, BLKTAP_IOCTL_KICK_BE);
530 pthread_mutex_unlock(&push_mutex);
531 }
533 if (notify_fe) {
534 DPRINTF("notifying fe\n");
535 pthread_mutex_lock(&push_mutex);
536 RING_PUSH_RESPONSES(&fe_ring);
537 ioctl(fd, BLKTAP_IOCTL_KICK_FE);
538 pthread_mutex_unlock(&push_mutex);
539 }
540 }
541 }
544 munmap(blktap_mem, PAGE_SIZE);
546 mmap_failed:
547 close(fd);
549 open_failed:
550 return 0;
551 }
553 void got_sig_bus() {
554 printf("Attempted to access a page that isn't.\n");
555 exit(-1);
556 }
558 void got_sig_int() {
559 DPRINTF("quitting -- returning to passthrough mode.\n");
560 if (fd > 0) ioctl(fd, BLKTAP_IOCTL_SETMODE, BLKTAP_MODE_PASSTHROUGH );
561 close(fd);
562 fd = 0;
563 exit(0);
564 }