ia64/xen-unstable

view tools/blktap/blktaplib.c @ 7238:971e7c7411b3

Raise an exception if an error appears on the pipes to our children, and make
sure that the child's pipes are closed even under that exception. Move the
handling of POLLHUP to the end of the loop, so that we guarantee to read any
remaining data from the child if POLLHUP and POLLIN appear at the same time.

Signed-off-by: Ewan Mellor <ewan@xensource.com>
author emellor@ewan
date Thu Oct 06 10:13:11 2005 +0100 (2005-10-06)
parents 06d84bf87159
children ff95b53bd39a
line source
1 /*
2 * blktaplib.c
3 *
4 * userspace interface routines for the blktap driver.
5 *
6 * (threadsafe(r) version)
7 *
8 * (c) 2004 Andrew Warfield.
9 */
11 #include <stdio.h>
12 #include <stdlib.h>
13 #include <sys/mman.h>
14 #include <sys/user.h>
15 #include <err.h>
16 #include <errno.h>
17 #include <sys/types.h>
18 #include <linux/types.h>
19 #include <sys/stat.h>
20 #include <fcntl.h>
21 #include <signal.h>
22 #include <sys/poll.h>
23 #include <sys/ioctl.h>
24 #include <string.h>
25 #include <unistd.h>
26 #include <pthread.h>
27 #include <xs.h>
29 #define __COMPILING_BLKTAP_LIB
30 #include "blktaplib.h"
32 #if 0
33 #define DPRINTF(_f, _a...) printf ( _f , ## _a )
34 #else
35 #define DPRINTF(_f, _a...) ((void)0)
36 #endif
37 #define DEBUG_RING_IDXS 0
39 #define POLLRDNORM 0x040
41 #define BLKTAP_IOCTL_KICK 1
44 void got_sig_bus();
45 void got_sig_int();
47 /* in kernel these are opposite, but we are a consumer now. */
48 blkif_back_ring_t fe_ring; /* slightly counterintuitive ;) */
49 blkif_front_ring_t be_ring;
51 unsigned long mmap_vstart = 0;
52 char *blktap_mem;
53 int fd = 0;
55 #define BLKTAP_RING_PAGES 1 /* Front */
56 #define BLKTAP_MMAP_REGION_SIZE (BLKTAP_RING_PAGES + MMAP_PAGES)
58 int bad_count = 0;
59 void bad(void)
60 {
61 bad_count ++;
62 if (bad_count > 50) exit(0);
63 }
64 /*-----[ ID Manipulation from tap driver code ]--------------------------*/
66 #define ACTIVE_RING_IDX unsigned short
68 inline unsigned long MAKE_ID(domid_t fe_dom, ACTIVE_RING_IDX idx)
69 {
70 return ( (fe_dom << 16) | idx );
71 }
73 inline unsigned int ID_TO_IDX(unsigned long id)
74 {
75 return ( id & 0x0000ffff );
76 }
78 inline domid_t ID_TO_DOM(unsigned long id) { return (id >> 16); }
80 static int (*request_hook)(blkif_request_t *req) = NULL;
81 static int (*response_hook)(blkif_response_t *req) = NULL;
83 /*-----[ Data to/from Backend (server) VM ]------------------------------*/
85 /*
87 inline int write_req_to_be_ring(blkif_request_t *req)
88 {
89 blkif_request_t *req_d;
90 static pthread_mutex_t be_prod_mutex = PTHREAD_MUTEX_INITIALIZER;
92 pthread_mutex_lock(&be_prod_mutex);
93 req_d = RING_GET_REQUEST(&be_ring, be_ring.req_prod_pvt);
94 memcpy(req_d, req, sizeof(blkif_request_t));
95 wmb();
96 be_ring.req_prod_pvt++;
97 pthread_mutex_unlock(&be_prod_mutex);
99 return 0;
100 }
101 */
103 inline int write_rsp_to_fe_ring(blkif_response_t *rsp)
104 {
105 blkif_response_t *rsp_d;
106 static pthread_mutex_t fe_prod_mutex = PTHREAD_MUTEX_INITIALIZER;
108 pthread_mutex_lock(&fe_prod_mutex);
109 rsp_d = RING_GET_RESPONSE(&fe_ring, fe_ring.rsp_prod_pvt);
110 memcpy(rsp_d, rsp, sizeof(blkif_response_t));
111 wmb();
112 fe_ring.rsp_prod_pvt++;
113 pthread_mutex_unlock(&fe_prod_mutex);
115 return 0;
116 }
118 static void apply_rsp_hooks(blkif_t *blkif, blkif_response_t *rsp)
119 {
120 response_hook_t *rsp_hook;
122 rsp_hook = blkif->response_hook_chain;
123 while (rsp_hook != NULL)
124 {
125 switch(rsp_hook->func(blkif, rsp, 1))
126 {
127 case BLKTAP_PASS:
128 break;
129 default:
130 printf("Only PASS is supported for resp hooks!\n");
131 }
132 rsp_hook = rsp_hook->next;
133 }
134 }
137 static pthread_mutex_t push_mutex = PTHREAD_MUTEX_INITIALIZER;
139 void blkif_inject_response(blkif_t *blkif, blkif_response_t *rsp)
140 {
142 apply_rsp_hooks(blkif, rsp);
144 write_rsp_to_fe_ring(rsp);
145 }
147 void blktap_kick_responses(void)
148 {
149 pthread_mutex_lock(&push_mutex);
151 RING_PUSH_RESPONSES(&fe_ring);
152 ioctl(fd, BLKTAP_IOCTL_KICK_FE);
154 pthread_mutex_unlock(&push_mutex);
155 }
157 /*-----[ Polling fd listeners ]------------------------------------------*/
159 #define MAX_POLLFDS 64
161 typedef struct {
162 int (*func)(int fd);
163 struct pollfd *pfd;
164 int fd;
165 short events;
166 int active;
167 } pollhook_t;
169 static struct pollfd pfd[MAX_POLLFDS+2]; /* tap and store are extra */
170 static pollhook_t pollhooks[MAX_POLLFDS];
171 static unsigned int ph_freelist[MAX_POLLFDS];
172 static unsigned int ph_cons, ph_prod;
173 #define nr_pollhooks() (MAX_POLLFDS - (ph_prod - ph_cons))
174 #define PH_IDX(x) (x % MAX_POLLFDS)
176 int blktap_attach_poll(int fd, short events, int (*func)(int fd))
177 {
178 pollhook_t *ph;
180 if (nr_pollhooks() == MAX_POLLFDS) {
181 printf("Too many pollhooks!\n");
182 return -1;
183 }
185 ph = &pollhooks[ph_freelist[PH_IDX(ph_cons++)]];
187 ph->func = func;
188 ph->fd = fd;
189 ph->events = events;
190 ph->active = 1;
192 DPRINTF("Added fd %d at ph index %d, now %d phs.\n", fd, ph_cons-1,
193 nr_pollhooks());
195 return 0;
196 }
198 void blktap_detach_poll(int fd)
199 {
200 int i;
202 for (i=0; i<MAX_POLLFDS; i++)
203 if ((pollhooks[i].active) && (pollhooks[i].pfd->fd == fd)) {
204 ph_freelist[PH_IDX(ph_prod++)] = i;
205 pollhooks[i].pfd->fd = -1;
206 pollhooks[i].active = 0;
207 break;
208 }
210 DPRINTF("Removed fd %d at ph index %d, now %d phs.\n", fd, i,
211 nr_pollhooks());
212 }
214 void pollhook_init(void)
215 {
216 int i;
218 for (i=0; i < MAX_POLLFDS; i++) {
219 ph_freelist[i] = (i+1) % MAX_POLLFDS;
220 pollhooks[i].active = 0;
221 }
223 ph_cons = 0;
224 ph_prod = MAX_POLLFDS;
225 }
227 void __attribute__ ((constructor)) blktaplib_init(void)
228 {
229 pollhook_init();
230 }
232 /*-----[ The main listen loop ]------------------------------------------*/
234 int blktap_listen(void)
235 {
236 int notify_be, notify_fe, tap_pfd, store_pfd, xs_fd, ret;
237 struct xs_handle *h;
238 blkif_t *blkif;
240 /* comms rings: */
241 blkif_request_t *req;
242 blkif_response_t *rsp;
243 blkif_sring_t *sring;
244 RING_IDX rp, i, pfd_count;
246 /* pending rings */
247 blkif_request_t req_pending[BLKIF_RING_SIZE];
248 /* blkif_response_t rsp_pending[BLKIF_RING_SIZE] */;
250 /* handler hooks: */
251 request_hook_t *req_hook;
252 response_hook_t *rsp_hook;
254 signal (SIGBUS, got_sig_bus);
255 signal (SIGINT, got_sig_int);
257 __init_blkif();
259 fd = open("/dev/blktap", O_RDWR);
260 if (fd == -1)
261 err(-1, "open failed!");
263 blktap_mem = mmap(0, PAGE_SIZE * BLKTAP_MMAP_REGION_SIZE,
264 PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
266 if ((int)blktap_mem == -1)
267 err(-1, "mmap failed!");
269 /* assign the rings to the mapped memory */
270 /*
271 sring = (blkif_sring_t *)((unsigned long)blktap_mem + PAGE_SIZE);
272 FRONT_RING_INIT(&be_ring, sring, PAGE_SIZE);
273 */
274 sring = (blkif_sring_t *)((unsigned long)blktap_mem);
275 BACK_RING_INIT(&fe_ring, sring, PAGE_SIZE);
277 mmap_vstart = (unsigned long)blktap_mem +(BLKTAP_RING_PAGES << PAGE_SHIFT);
280 /* Set up store connection and watch. */
281 h = xs_daemon_open();
282 if (h == NULL)
283 err(-1, "xs_daemon_open");
285 ret = add_blockdevice_probe_watch(h, "Domain-0");
286 if (ret != 0)
287 err(0, "adding device probewatch");
289 ioctl(fd, BLKTAP_IOCTL_SETMODE, BLKTAP_MODE_INTERPOSE );
291 while(1) {
292 int ret;
294 /* build the poll list */
295 pfd_count = 0;
296 for ( i=0; i < MAX_POLLFDS; i++ ) {
297 pollhook_t *ph = &pollhooks[i];
299 if (ph->active) {
300 pfd[pfd_count].fd = ph->fd;
301 pfd[pfd_count].events = ph->events;
302 ph->pfd = &pfd[pfd_count];
303 pfd_count++;
304 }
305 }
307 tap_pfd = pfd_count++;
308 pfd[tap_pfd].fd = fd;
309 pfd[tap_pfd].events = POLLIN;
311 store_pfd = pfd_count++;
312 pfd[store_pfd].fd = xs_fileno(h);
313 pfd[store_pfd].events = POLLIN;
315 if ( (ret = (poll(pfd, pfd_count, 10000)) == 0) ) {
316 if (DEBUG_RING_IDXS)
317 ioctl(fd, BLKTAP_IOCTL_PRINT_IDXS);
318 continue;
319 }
321 for (i=0; i < MAX_POLLFDS; i++) {
322 if ( (pollhooks[i].active ) && (pollhooks[i].pfd->revents ) )
323 pollhooks[i].func(pollhooks[i].pfd->fd);
324 }
326 if (pfd[store_pfd].revents) {
327 ret = xs_fire_next_watch(h);
328 }
330 if (pfd[tap_pfd].revents)
331 {
332 /* empty the fe_ring */
333 notify_fe = 0;
334 notify_be = RING_HAS_UNCONSUMED_REQUESTS(&fe_ring);
335 rp = fe_ring.sring->req_prod;
336 rmb();
337 for (i = fe_ring.req_cons; i != rp; i++)
338 {
339 int done = 0;
341 req = RING_GET_REQUEST(&fe_ring, i);
342 memcpy(&req_pending[ID_TO_IDX(req->id)], req, sizeof(*req));
343 req = &req_pending[ID_TO_IDX(req->id)];
345 blkif = blkif_find_by_handle(ID_TO_DOM(req->id), req->handle);
347 if (blkif != NULL)
348 {
349 req_hook = blkif->request_hook_chain;
350 while (req_hook != NULL)
351 {
352 switch(req_hook->func(blkif, req, ((i+1) == rp)))
353 {
354 case BLKTAP_RESPOND:
355 apply_rsp_hooks(blkif, (blkif_response_t *)req);
356 write_rsp_to_fe_ring((blkif_response_t *)req);
357 notify_fe = 1;
358 done = 1;
359 break;
360 case BLKTAP_STOLEN:
361 done = 1;
362 break;
363 case BLKTAP_PASS:
364 break;
365 default:
366 printf("Unknown request hook return value!\n");
367 }
368 if (done) break;
369 req_hook = req_hook->next;
370 }
371 }
373 if (done == 0)
374 {
375 /* this was: */
376 /* write_req_to_be_ring(req); */
378 unsigned long id = req->id;
379 unsigned short operation = req->operation;
380 printf("Unterminated request!\n");
381 rsp = (blkif_response_t *)req;
382 rsp->id = id;
383 rsp->operation = operation;
384 rsp->status = BLKIF_RSP_ERROR;
385 write_rsp_to_fe_ring(rsp);
386 notify_fe = 1;
387 done = 1;
388 }
390 }
391 fe_ring.req_cons = i;
393 /* empty the be_ring */
394 /*
395 notify_fe |= RING_HAS_UNCONSUMED_RESPONSES(&be_ring);
396 rp = be_ring.sring->rsp_prod;
397 rmb();
398 for (i = be_ring.rsp_cons; i != rp; i++)
399 {
401 rsp = RING_GET_RESPONSE(&be_ring, i);
402 memcpy(&rsp_pending[ID_TO_IDX(rsp->id)], rsp, sizeof(*rsp));
403 rsp = &rsp_pending[ID_TO_IDX(rsp->id)];
405 DPRINTF("copying a be request\n");
407 apply_rsp_hooks(rsp);
408 write_rsp_to_fe_ring(rsp);
409 }
410 be_ring.rsp_cons = i;
411 */
412 /* notify the domains */
413 /*
414 if (notify_be) {
415 DPRINTF("notifying be\n");
416 pthread_mutex_lock(&push_mutex);
417 RING_PUSH_REQUESTS(&be_ring);
418 ioctl(fd, BLKTAP_IOCTL_KICK_BE);
419 pthread_mutex_unlock(&push_mutex);
420 }
421 */
422 if (notify_fe) {
423 DPRINTF("notifying fe\n");
424 pthread_mutex_lock(&push_mutex);
425 RING_PUSH_RESPONSES(&fe_ring);
426 ioctl(fd, BLKTAP_IOCTL_KICK_FE);
427 pthread_mutex_unlock(&push_mutex);
428 }
429 }
430 }
433 munmap(blktap_mem, PAGE_SIZE);
435 mmap_failed:
436 close(fd);
438 open_failed:
439 return 0;
440 }
442 void got_sig_bus() {
443 printf("Attempted to access a page that isn't.\n");
444 exit(-1);
445 }
447 void got_sig_int() {
448 DPRINTF("quitting -- returning to passthrough mode.\n");
449 if (fd > 0) ioctl(fd, BLKTAP_IOCTL_SETMODE, BLKTAP_MODE_PASSTHROUGH );
450 close(fd);
451 fd = 0;
452 exit(0);
453 }