direct-io.hg

changeset 4259:2405bf94a500

bitkeeper revision 1.1236.39.1 (42403ed3ayhqDaCmozMDqaFfcmjpzQ)

Added asynchronous support to the blockstore.
author jrb44@swoop.cl.cam.ac.uk
date Tue Mar 22 15:50:43 2005 +0000 (2005-03-22)
parents e57dc11820ba
children 0529fe941ec4
files BitKeeper/etc/logging_ok tools/blktap/blockstore.c tools/blktap/blockstore.h
line diff
     1.1 --- a/BitKeeper/etc/logging_ok	Mon Mar 21 09:52:57 2005 +0000
     1.2 +++ b/BitKeeper/etc/logging_ok	Tue Mar 22 15:50:43 2005 +0000
     1.3 @@ -34,6 +34,7 @@ iap10@pb007.cl.cam.ac.uk
     1.4  iap10@striker.cl.cam.ac.uk
     1.5  iap10@tetris.cl.cam.ac.uk
     1.6  jrb44@plym.cl.cam.ac.uk
     1.7 +jrb44@swoop.cl.cam.ac.uk
     1.8  jws22@gauntlet.cl.cam.ac.uk
     1.9  jws@cairnwell.research
    1.10  kaf24@camelot.eng.3leafnetworks.com
     2.1 --- a/tools/blktap/blockstore.c	Mon Mar 21 09:52:57 2005 +0000
     2.2 +++ b/tools/blktap/blockstore.c	Tue Mar 22 15:50:43 2005 +0000
     2.3 @@ -13,31 +13,73 @@
     2.4  #include <string.h>
     2.5  #include <sys/types.h>
     2.6  #include <sys/stat.h>
     2.7 +#include <stdarg.h>
     2.8  #include "blockstore.h"
     2.9  
    2.10  #define BLOCKSTORE_REMOTE
    2.11 +//#define BSDEBUG
    2.12 +
    2.13 +/*****************************************************************************
    2.14 + * Debugging
    2.15 + */
    2.16 +#ifdef BSDEBUG
    2.17 +void DB(char *format, ...)
    2.18 +{
    2.19 +    va_list args;
    2.20 +    
    2.21 +    va_start(args, format);
    2.22 +    vfprintf(stderr, format, args);
    2.23 +    va_end(args);
    2.24 +}
    2.25 +#else
    2.26 +#define DB(format, ...) (void)0
    2.27 +#endif
    2.28  
    2.29  #ifdef BLOCKSTORE_REMOTE
    2.30  
    2.31 -//#define BSDEBUG
    2.32 -
    2.33  #include <sys/socket.h>
    2.34  #include <sys/ioctl.h>
    2.35  #include <netinet/in.h>
    2.36  #include <netdb.h>
    2.37  
    2.38 +/*****************************************************************************
    2.39 + *                                                                           *
    2.40 + *****************************************************************************/
    2.41 +
    2.42 +/*****************************************************************************
    2.43 + * Network state                                                             *
    2.44 + *****************************************************************************/
    2.45 +
    2.46 +/* The individual disk servers we talks to. These will be referenced by
    2.47 + * an integer index into bsservers[].
    2.48 + */
    2.49 +bsserver_t bsservers[MAX_SERVERS];
    2.50 +
    2.51 +/* The cluster map. This is indexed by an integer cluster number.
    2.52 + */
    2.53 +bscluster_t bsclusters[MAX_CLUSTERS];
    2.54 +
    2.55 +/* Local socket.
    2.56 + */
    2.57 +struct sockaddr_in sin_local;
    2.58 +int bssock = 0;
    2.59 +
    2.60 +/*****************************************************************************
    2.61 + * Message queue management                                                  *
    2.62 + *****************************************************************************/
    2.63 +
    2.64 +/* Protects the queue manipulation critcal regions.
    2.65 + */
    2.66  #define ENTER_QUEUE_CR (void)0
    2.67  #define LEAVE_QUEUE_CR (void)0
    2.68  
    2.69 -bsserver_t bsservers[MAX_SERVERS];
    2.70 -bscluster_t bsclusters[MAX_CLUSTERS];
    2.71 -
    2.72 -struct sockaddr_in sin_local;
    2.73 -int bssock = 0;
    2.74 -
    2.75 +/* A message queue entry. We allocate one of these for every request we send.
    2.76 + * Asynchronous reply reception also used one of these.
    2.77 + */
    2.78  typedef struct bsq_t_struct {
    2.79      struct bsq_t_struct *prev;
    2.80      struct bsq_t_struct *next;
    2.81 +    int status;
    2.82      int server;
    2.83      int length;
    2.84      struct msghdr msghdr;
    2.85 @@ -46,8 +88,134 @@ typedef struct bsq_t_struct {
    2.86      void *block;
    2.87  } bsq_t;
    2.88  
    2.89 +#define BSQ_STATUS_MATCHED 1
    2.90 +
    2.91 +#define ENTER_LUID_CR (void)0
    2.92 +#define LEAVE_LUID_CR (void)0
    2.93 +
    2.94 +static u64 luid_cnt = 0x1000ULL;
    2.95 +u64 new_luid(void) {
    2.96 +    u64 luid;
    2.97 +    ENTER_LUID_CR;
    2.98 +    luid = luid_cnt++;
    2.99 +    LEAVE_LUID_CR;
   2.100 +    return luid;
   2.101 +}
   2.102 +
   2.103 +/* Queue of outstanding requests.
   2.104 + */
   2.105  bsq_t *bs_head = NULL;
   2.106  bsq_t *bs_tail = NULL;
   2.107 +int bs_qlen = 0;
   2.108 +
   2.109 +/*
   2.110 + */
   2.111 +void queuedebug(char *msg) {
   2.112 +    bsq_t *q;
   2.113 +    ENTER_QUEUE_CR;
   2.114 +    fprintf(stderr, "Q: %s len=%u\n", msg, bs_qlen);
   2.115 +    for (q = bs_head; q; q = q->next) {
   2.116 +        fprintf(stderr, "  luid=%016llx server=%u\n",
   2.117 +                q->message.luid, q->server);
   2.118 +    }
   2.119 +    LEAVE_QUEUE_CR;
   2.120 +}
   2.121 +
   2.122 +int enqueue(bsq_t *qe) {
   2.123 +    ENTER_QUEUE_CR;
   2.124 +    qe->next = NULL;
   2.125 +    qe->prev = bs_tail;
   2.126 +    if (!bs_head)
   2.127 +        bs_head = qe;
   2.128 +    else
   2.129 +        bs_tail->next = qe;
   2.130 +    bs_tail = qe;
   2.131 +    bs_qlen++;
   2.132 +    LEAVE_QUEUE_CR;
   2.133 +#ifdef BSDEBUG
   2.134 +    queuedebug("enqueue");
   2.135 +#endif
   2.136 +    return 0;
   2.137 +}
   2.138 +
   2.139 +int dequeue(bsq_t *qe) {
   2.140 +    bsq_t *q;
   2.141 +    ENTER_QUEUE_CR;
   2.142 +    for (q = bs_head; q; q = q->next) {
   2.143 +        if (q == qe) {
   2.144 +            if (q->prev)
   2.145 +                q->prev->next = q->next;
   2.146 +            else 
   2.147 +                bs_head = q->next;
   2.148 +            if (q->next)
   2.149 +                q->next->prev = q->prev;
   2.150 +            else
   2.151 +                bs_tail = q->prev;
   2.152 +            bs_qlen--;
   2.153 +            goto found;
   2.154 +        }
   2.155 +    }
   2.156 +
   2.157 +    LEAVE_QUEUE_CR;
   2.158 +#ifdef BSDEBUG
   2.159 +    queuedebug("dequeue not found");
   2.160 +#endif
   2.161 +    return 0;
   2.162 +
   2.163 +    found:
   2.164 +    LEAVE_QUEUE_CR;
   2.165 +#ifdef BSDEBUG
   2.166 +    queuedebug("dequeue not found");
   2.167 +#endif
   2.168 +    return 1;
   2.169 +}
   2.170 +
   2.171 +bsq_t *queuesearch(bsq_t *qe) {
   2.172 +    bsq_t *q;
   2.173 +    ENTER_QUEUE_CR;
   2.174 +    for (q = bs_head; q; q = q->next) {
   2.175 +        if ((qe->server == q->server) &&
   2.176 +            (qe->message.operation == q->message.operation) &&
   2.177 +            (qe->message.luid == q->message.luid)) {
   2.178 +
   2.179 +            if ((q->message.operation == BSOP_READBLOCK) &&
   2.180 +                ((q->message.flags & BSOP_FLAG_ERROR) == 0)) {
   2.181 +                q->block = qe->block;
   2.182 +                qe->block = NULL;
   2.183 +            }
   2.184 +            q->length = qe->length;
   2.185 +            q->message.flags = qe->message.flags;
   2.186 +            q->message.id = qe->message.id;
   2.187 +            q->status |= BSQ_STATUS_MATCHED;
   2.188 +
   2.189 +            if (q->prev)
   2.190 +                q->prev->next = q->next;
   2.191 +            else 
   2.192 +                bs_head = q->next;
   2.193 +            if (q->next)
   2.194 +                q->next->prev = q->prev;
   2.195 +            else
   2.196 +                bs_tail = q->prev;
   2.197 +            q->next = NULL;
   2.198 +            q->prev = NULL;
   2.199 +            bs_qlen--;
   2.200 +            goto found;
   2.201 +        }
   2.202 +    }
   2.203 +
   2.204 +    LEAVE_QUEUE_CR;
   2.205 +#ifdef BSDEBUG
   2.206 +    queuedebug("queuesearch not found");
   2.207 +#endif
   2.208 +    return NULL;
   2.209 +
   2.210 +    found:
   2.211 +    LEAVE_QUEUE_CR;
   2.212 +#ifdef BSDEBUG
   2.213 +    queuedebug("queuesearch found");
   2.214 +#endif
   2.215 +    return q;
   2.216 +}
   2.217  
   2.218  int send_message(bsq_t *qe) {
   2.219      int rc;
   2.220 @@ -71,16 +239,21 @@ int send_message(bsq_t *qe) {
   2.221          qe->iov[1].iov_len = BLOCK_SIZE;
   2.222      }
   2.223  
   2.224 -    rc = sendmsg(bssock, &(qe->msghdr), 0);
   2.225 +    qe->message.luid = new_luid();
   2.226 +
   2.227 +    qe->status = 0;
   2.228 +    if (enqueue(qe) < 0) {
   2.229 +        fprintf(stderr, "Error enqueuing request.\n");
   2.230 +        return -1;
   2.231 +    }
   2.232 +
   2.233 +    DB("send_message to %d luid=%016llx\n", qe->server, qe->message.luid);
   2.234 +    rc = sendmsg(bssock, &(qe->msghdr), MSG_DONTWAIT);
   2.235      //rc = sendto(bssock, (void *)&(qe->message), qe->length, 0,
   2.236      //           (struct sockaddr *)&(bsservers[qe->server].sin),
   2.237      //           sizeof(struct sockaddr_in));
   2.238      if (rc < 0)
   2.239          return rc;
   2.240 -    
   2.241 -    ENTER_QUEUE_CR;
   2.242 -    
   2.243 -    LEAVE_QUEUE_CR;
   2.244  
   2.245      return rc;
   2.246  }
   2.247 @@ -115,22 +288,148 @@ int recv_message(bsq_t *qe) {
   2.248      return rc;
   2.249  }
   2.250  
   2.251 +int get_server_number(struct sockaddr_in *sin) {
   2.252 +    int i;
   2.253 +
   2.254 +#ifdef BSDEBUG2
   2.255 +    fprintf(stderr,
   2.256 +            "get_server_number(%u.%u.%u.%u/%u)\n",
   2.257 +            (unsigned int)sin->sin_addr.s_addr & 0xff,
   2.258 +            ((unsigned int)sin->sin_addr.s_addr >> 8) & 0xff,
   2.259 +            ((unsigned int)sin->sin_addr.s_addr >> 16) & 0xff,
   2.260 +            ((unsigned int)sin->sin_addr.s_addr >> 24) & 0xff,
   2.261 +            (unsigned int)sin->sin_port);
   2.262 +#endif
   2.263 +
   2.264 +    for (i = 0; i < MAX_SERVERS; i++) {
   2.265 +        if (bsservers[i].hostname) {
   2.266 +#ifdef BSDEBUG2
   2.267 +            fprintf(stderr,
   2.268 +                    "get_server_number check %u.%u.%u.%u/%u\n",
   2.269 +                    (unsigned int)bsservers[i].sin.sin_addr.s_addr&0xff,
   2.270 +                    ((unsigned int)bsservers[i].sin.sin_addr.s_addr >> 8)&0xff,
   2.271 +                    ((unsigned int)bsservers[i].sin.sin_addr.s_addr >> 16)&0xff,
   2.272 +                    ((unsigned int)bsservers[i].sin.sin_addr.s_addr >> 24)&0xff,
   2.273 +                    (unsigned int)bsservers[i].sin.sin_port);
   2.274 +#endif
   2.275 +            if ((sin->sin_family == bsservers[i].sin.sin_family) &&
   2.276 +                (sin->sin_port == bsservers[i].sin.sin_port) &&
   2.277 +                (memcmp((void *)&(sin->sin_addr),
   2.278 +                        (void *)&(bsservers[i].sin.sin_addr),
   2.279 +                        sizeof(struct in_addr)) == 0)) {
   2.280 +                return i;
   2.281 +            }
   2.282 +        }        
   2.283 +    }
   2.284 +
   2.285 +    return -1;
   2.286 +}
   2.287 +
   2.288 +void *rx_buffer = NULL;
   2.289 +bsq_t rx_qe;
   2.290 +bsq_t *recv_any(void) {
   2.291 +    struct sockaddr_in from;
   2.292 +    int rc;
   2.293 +
   2.294 +    DB("ENTER recv_any\n");
   2.295 +
   2.296 +    rx_qe.msghdr.msg_name = &from;
   2.297 +    rx_qe.msghdr.msg_namelen = sizeof(struct sockaddr_in);
   2.298 +    rx_qe.msghdr.msg_iov = rx_qe.iov;
   2.299 +    if (!rx_buffer) {
   2.300 +        rx_buffer = malloc(BLOCK_SIZE);
   2.301 +        if (!rx_buffer) {
   2.302 +            perror("recv_any malloc");
   2.303 +            return NULL;
   2.304 +        }
   2.305 +    }
   2.306 +    rx_qe.block = rx_buffer;
   2.307 +    rx_buffer = NULL;
   2.308 +    rx_qe.msghdr.msg_iovlen = 2;
   2.309 +    rx_qe.msghdr.msg_control = NULL;
   2.310 +    rx_qe.msghdr.msg_controllen = 0;
   2.311 +    rx_qe.msghdr.msg_flags = 0;
   2.312 +    
   2.313 +    rx_qe.iov[0].iov_base = (void *)&(rx_qe.message);
   2.314 +    rx_qe.iov[0].iov_len = MSGBUFSIZE_ID;
   2.315 +    rx_qe.iov[1].iov_base = rx_qe.block;
   2.316 +    rx_qe.iov[1].iov_len = BLOCK_SIZE;
   2.317 +
   2.318 +    rc = recvmsg(bssock, &(rx_qe.msghdr), 0);
   2.319 +    if (rc < 0) {
   2.320 +        perror("recv_any");
   2.321 +        return NULL;
   2.322 +    }
   2.323 +    rx_qe.length = rc;    
   2.324 +    rx_qe.server = get_server_number(&from);
   2.325 +
   2.326 +    DB("recv_any from %d luid=%016llx len=%u\n",
   2.327 +       rx_qe.server, rx_qe.message.luid, rx_qe.length);
   2.328 +
   2.329 +    return &rx_qe;
   2.330 +}
   2.331 +
   2.332 +void recv_recycle_buffer(bsq_t *q) {
   2.333 +    if (q->block) {
   2.334 +        rx_buffer = q->block;
   2.335 +        q->block = NULL;
   2.336 +    }
   2.337 +}
   2.338 +
   2.339 +// cycle through reading any incoming, searching for a match in the
   2.340 +// queue, until we have all we need.
   2.341 +int wait_recv(bsq_t **reqs, int numreqs) {
   2.342 +    bsq_t *q, *m;
   2.343 +    unsigned int x, i;
   2.344 +
   2.345 +    DB("ENTER wait_recv %u\n", numreqs);
   2.346 +
   2.347 +    checkmatch:
   2.348 +    x = 0xffffffff;
   2.349 +    for (i = 0; i < numreqs; i++) {
   2.350 +        x &= reqs[i]->status;
   2.351 +    }
   2.352 +    if ((x & BSQ_STATUS_MATCHED)) {
   2.353 +        DB("LEAVE wait_recv\n");
   2.354 +        return numreqs;
   2.355 +    }
   2.356 +
   2.357 +    rxagain:
   2.358 +    q = recv_any();
   2.359 +    if (!q)
   2.360 +        return -1;
   2.361 +
   2.362 +    m = queuesearch(q);
   2.363 +    recv_recycle_buffer(q);
   2.364 +    if (!m) {
   2.365 +        fprintf(stderr, "Unmatched RX\n");
   2.366 +        goto rxagain;
   2.367 +    }
   2.368 +
   2.369 +    goto checkmatch;
   2.370 +
   2.371 +}
   2.372 +
   2.373  void *readblock_indiv(int server, u64 id) {
   2.374      void *block;
   2.375      bsq_t *qe;
   2.376 -    int len;
   2.377 +    int len, rc;
   2.378  
   2.379      qe = (bsq_t *)malloc(sizeof(bsq_t));
   2.380      if (!qe) {
   2.381          perror("readblock qe malloc");
   2.382          return NULL;
   2.383      }
   2.384 +    qe->block = NULL;
   2.385 +    
   2.386 +    /*
   2.387      qe->block = malloc(BLOCK_SIZE);
   2.388      if (!qe->block) {
   2.389          perror("readblock qe malloc");
   2.390          free((void *)qe);
   2.391          return NULL;
   2.392      }
   2.393 +    */
   2.394  
   2.395      qe->server = server;
   2.396  
   2.397 @@ -144,31 +443,40 @@ void *readblock_indiv(int server, u64 id
   2.398          goto err;
   2.399      }
   2.400      
   2.401 -    len = recv_message(qe);
   2.402 +    /*len = recv_message(qe);
   2.403      if (len < 0) {
   2.404          perror("readblock recv");
   2.405          goto err;
   2.406 +    }*/
   2.407 +
   2.408 +    rc = wait_recv(&qe, 1);
   2.409 +    if (rc < 0) {
   2.410 +        perror("readblock recv");
   2.411 +        goto err;
   2.412      }
   2.413 +
   2.414      if ((qe->message.flags & BSOP_FLAG_ERROR)) {
   2.415          fprintf(stderr, "readblock server error\n");
   2.416          goto err;
   2.417      }
   2.418 -    if (len < MSGBUFSIZE_BLOCK) {
   2.419 +    if (qe->length < MSGBUFSIZE_BLOCK) {
   2.420          fprintf(stderr, "readblock recv short (%u)\n", len);
   2.421          goto err;
   2.422      }
   2.423 -    if ((block = malloc(BLOCK_SIZE)) == NULL) {
   2.424 +    /* if ((block = malloc(BLOCK_SIZE)) == NULL) {
   2.425          perror("readblock malloc");
   2.426          goto err;
   2.427      }
   2.428 -    //memcpy(block, qe->message.block, BLOCK_SIZE);
   2.429 +    memcpy(block, qe->message.block, BLOCK_SIZE);
   2.430 +    */    
   2.431      block = qe->block;
   2.432  
   2.433      free((void *)qe);
   2.434      return block;
   2.435  
   2.436      err:
   2.437 -    free(qe->block);
   2.438 +    if (qe->block)
   2.439 +        free(qe->block);
   2.440      free((void *)qe);
   2.441      return NULL;
   2.442  }
   2.443 @@ -229,7 +537,8 @@ void *readblock(u64 id) {
   2.444      return block;
   2.445  }
   2.446  
   2.447 -int writeblock_indiv(int server, u64 id, void *block) {
   2.448 +bsq_t *writeblock_indiv(int server, u64 id, void *block) {
   2.449 +
   2.450      bsq_t *qe;
   2.451      int len;
   2.452  
   2.453 @@ -251,28 +560,14 @@ int writeblock_indiv(int server, u64 id,
   2.454          perror("writeblock sendto");
   2.455          goto err;
   2.456      }
   2.457 -    
   2.458 -    len = recv_message(qe);
   2.459 -    if (len < 0) {
   2.460 -        perror("writeblock recv");
   2.461 -        goto err;
   2.462 -    }
   2.463 -    if ((qe->message.flags & BSOP_FLAG_ERROR)) {
   2.464 -        fprintf(stderr, "writeblock server error\n");
   2.465 -        goto err;
   2.466 -    }
   2.467 -    if (len < MSGBUFSIZE_ID) {
   2.468 -        fprintf(stderr, "writeblock recv short (%u)\n", len);
   2.469 -        goto err;
   2.470 -    }
   2.471  
   2.472 -    free((void *)qe);
   2.473 -    return 0;
   2.474 +    return qe;
   2.475  
   2.476      err:
   2.477      free((void *)qe);
   2.478 -    return -1;
   2.479 +    return NULL;
   2.480  }
   2.481 +    
   2.482  
   2.483  /**
   2.484   * writeblock: write an existing block to disk
   2.485 @@ -282,11 +577,15 @@ int writeblock_indiv(int server, u64 id,
   2.486   *   @return: zero on success, -1 on failure
   2.487   */
   2.488  int writeblock(u64 id, void *block) {
   2.489 +    
   2.490      int map = (int)BSID_MAP(id);
   2.491 -    
   2.492      int rep0 = bsclusters[map].servers[0];
   2.493      int rep1 = bsclusters[map].servers[1];
   2.494      int rep2 = bsclusters[map].servers[2];
   2.495 +    bsq_t *reqs[3];
   2.496 +    int rc;
   2.497 +
   2.498 +    reqs[0] = reqs[1] = reqs[2] = NULL;
   2.499  
   2.500  #ifdef BSDEBUG
   2.501      fprintf(stderr,
   2.502 @@ -302,20 +601,65 @@ int writeblock(u64 id, void *block) {
   2.503              (unsigned int)((unsigned char *)block)[7]);
   2.504  #endif
   2.505  
   2.506 -/* special case for the "superblock" just use the first block on the
   2.507 +    /* special case for the "superblock" just use the first block on the
   2.508       * first replica. (extend to blocks < 6 for vdi bug)
   2.509       */
   2.510      if (id < 6) {
   2.511 -        return writeblock_indiv(rep0, id, block);
   2.512 +        reqs[0] = writeblock_indiv(rep0, id, block);
   2.513 +        if (!reqs[0])
   2.514 +            return -1;
   2.515 +        rc = wait_recv(reqs, 1);
   2.516 +        return rc;
   2.517      }
   2.518  
   2.519 -    if (writeblock_indiv(rep0, BSID_REPLICA0(id), block) < 0)
   2.520 -        return -1;
   2.521 -    if (writeblock_indiv(rep1, BSID_REPLICA1(id), block) < 0)
   2.522 -        return -1;
   2.523 -    if (writeblock_indiv(rep2, BSID_REPLICA2(id), block) < 0)
   2.524 -        return -1;
   2.525 +    reqs[0] = writeblock_indiv(rep0, BSID_REPLICA0(id), block);
   2.526 +    if (!reqs[0])
   2.527 +        goto err;
   2.528 +    reqs[1] = writeblock_indiv(rep1, BSID_REPLICA1(id), block);
   2.529 +    if (!reqs[1])
   2.530 +        goto err;
   2.531 +    reqs[2] = writeblock_indiv(rep2, BSID_REPLICA2(id), block);
   2.532 +    if (!reqs[2])
   2.533 +        goto err;
   2.534 +
   2.535 +    rc = wait_recv(reqs, 3);
   2.536 +    if (rc < 0) {
   2.537 +        perror("writeblock recv");
   2.538 +        goto err;
   2.539 +    }
   2.540 +    if ((reqs[0]->message.flags & BSOP_FLAG_ERROR)) {
   2.541 +        fprintf(stderr, "writeblock server0 error\n");
   2.542 +        goto err;
   2.543 +    }
   2.544 +    if ((reqs[1]->message.flags & BSOP_FLAG_ERROR)) {
   2.545 +        fprintf(stderr, "writeblock server1 error\n");
   2.546 +        goto err;
   2.547 +    }
   2.548 +    if ((reqs[2]->message.flags & BSOP_FLAG_ERROR)) {
   2.549 +        fprintf(stderr, "writeblock server2 error\n");
   2.550 +        goto err;
   2.551 +    }
   2.552 +
   2.553 +
   2.554 +    free((void *)reqs[0]);
   2.555 +    free((void *)reqs[1]);
   2.556 +    free((void *)reqs[2]);
   2.557      return 0;
   2.558 +
   2.559 +    err:
   2.560 +    if (reqs[0]) {
   2.561 +        dequeue(reqs[0]);
   2.562 +        free((void *)reqs[0]);
   2.563 +    }
   2.564 +    if (reqs[1]) {
   2.565 +        dequeue(reqs[1]);
   2.566 +        free((void *)reqs[1]);
   2.567 +    }
   2.568 +    if (reqs[2]) {
   2.569 +        dequeue(reqs[2]);
   2.570 +        free((void *)reqs[2]);
   2.571 +    }
   2.572 +    return -1;
   2.573  }
   2.574  
   2.575  /**
   2.576 @@ -328,7 +672,7 @@ u64 allocblock(void *block) {
   2.577      return allocblock_hint(block, 0);
   2.578  }
   2.579  
   2.580 -u64 allocblock_hint_indiv(int server, void *block, u64 hint) {
   2.581 +bsq_t *allocblock_hint_indiv(int server, void *block, u64 hint) {
   2.582      bsq_t *qe;
   2.583      int len;
   2.584  
   2.585 @@ -351,26 +695,11 @@ u64 allocblock_hint_indiv(int server, vo
   2.586          goto err;
   2.587      }
   2.588      
   2.589 -    len = recv_message(qe);
   2.590 -    if (len < 0) {
   2.591 -        perror("allocblock_hint recv");
   2.592 -        goto err;
   2.593 -    }
   2.594 -    if ((qe->message.flags & BSOP_FLAG_ERROR)) {
   2.595 -        fprintf(stderr, "allocblock_hint server error\n");
   2.596 -        goto err;
   2.597 -    }
   2.598 -    if (len < MSGBUFSIZE_ID) {
   2.599 -        fprintf(stderr, "allocblock_hint recv short (%u)\n", len);
   2.600 -        goto err;
   2.601 -    }
   2.602 -
   2.603 -    free((void *)qe);
   2.604 -    return qe->message.id;
   2.605 +    return qe;
   2.606  
   2.607      err:
   2.608      free((void *)qe);
   2.609 -    return 0;
   2.610 +    return NULL;
   2.611  }
   2.612  
   2.613  /**
   2.614 @@ -382,22 +711,48 @@ u64 allocblock_hint_indiv(int server, vo
   2.615   */
   2.616  u64 allocblock_hint(void *block, u64 hint) {
   2.617      int map = (int)hint;
   2.618 -    
   2.619      int rep0 = bsclusters[map].servers[0];
   2.620      int rep1 = bsclusters[map].servers[1];
   2.621      int rep2 = bsclusters[map].servers[2];
   2.622 -
   2.623 +    bsq_t *reqs[3];
   2.624 +    int rc;
   2.625      u64 id0, id1, id2;
   2.626  
   2.627 -    id0 = allocblock_hint_indiv(rep0, block, 0);
   2.628 -    if (id0 == 0)
   2.629 -        return 0;
   2.630 -    id1 = allocblock_hint_indiv(rep1, block, 0);
   2.631 -    if (id1 == 0)
   2.632 -        return 0;
   2.633 -    id2 = allocblock_hint_indiv(rep2, block, 0);
   2.634 -    if (id2 == 0)
   2.635 -        return 0;
   2.636 +    reqs[0] = reqs[1] = reqs[2] = NULL;
   2.637 +
   2.638 +    DB("ENTER allocblock\n");
   2.639 +
   2.640 +    reqs[0] = allocblock_hint_indiv(rep0, block, hint);
   2.641 +    if (!reqs[0])
   2.642 +        goto err;
   2.643 +    reqs[1] = allocblock_hint_indiv(rep1, block, hint);
   2.644 +    if (!reqs[1])
   2.645 +        goto err;
   2.646 +    reqs[2] = allocblock_hint_indiv(rep2, block, hint);
   2.647 +    if (!reqs[2])
   2.648 +        goto err;
   2.649 +
   2.650 +    rc = wait_recv(reqs, 3);
   2.651 +    if (rc < 0) {
   2.652 +        perror("allocblock recv");
   2.653 +        goto err;
   2.654 +    }
   2.655 +    if ((reqs[0]->message.flags & BSOP_FLAG_ERROR)) {
   2.656 +        fprintf(stderr, "allocblock server0 error\n");
   2.657 +        goto err;
   2.658 +    }
   2.659 +    if ((reqs[1]->message.flags & BSOP_FLAG_ERROR)) {
   2.660 +        fprintf(stderr, "allocblock server1 error\n");
   2.661 +        goto err;
   2.662 +    }
   2.663 +    if ((reqs[2]->message.flags & BSOP_FLAG_ERROR)) {
   2.664 +        fprintf(stderr, "allocblock server2 error\n");
   2.665 +        goto err;
   2.666 +    }
   2.667 +
   2.668 +    id0 = reqs[0]->message.id;
   2.669 +    id1 = reqs[1]->message.id;
   2.670 +    id2 = reqs[2]->message.id;
   2.671  
   2.672  #ifdef BSDEBUG
   2.673      fprintf(stderr, "ALLOC: %016llx %02x%02x %02x%02x %02x%02x %02x%02x\n",
   2.674 @@ -411,8 +766,26 @@ u64 allocblock_hint(void *block, u64 hin
   2.675              (unsigned int)((unsigned char *)block)[6],
   2.676              (unsigned int)((unsigned char *)block)[7]);
   2.677  #endif
   2.678 +    
   2.679 +    free((void *)reqs[0]);
   2.680 +    free((void *)reqs[1]);
   2.681 +    free((void *)reqs[2]);
   2.682 +    return BSID(map, id0, id1, id2);
   2.683  
   2.684 -    return BSID(map, id0, id1, id2);
   2.685 +    err:
   2.686 +    if (reqs[0]) {
   2.687 +        dequeue(reqs[0]);
   2.688 +        free((void *)reqs[0]);
   2.689 +    }
   2.690 +    if (reqs[1]) {
   2.691 +        dequeue(reqs[1]);
   2.692 +        free((void *)reqs[1]);
   2.693 +    }
   2.694 +    if (reqs[2]) {
   2.695 +        dequeue(reqs[2]);
   2.696 +        free((void *)reqs[2]);
   2.697 +    }
   2.698 +    return 0;
   2.699  }
   2.700  
   2.701  #else /* /BLOCKSTORE_REMOTE */
   2.702 @@ -543,13 +916,13 @@ int __init_blockstore(void)
   2.703      int i;
   2.704  
   2.705      bsservers[0].hostname = "firebug.cl.cam.ac.uk";
   2.706 -    bsservers[1].hostname = "tetris.cl.cam.ac.uk";
   2.707 -    bsservers[2].hostname = "donkeykong.cl.cam.ac.uk";
   2.708 -    bsservers[3].hostname = "gunfighter.cl.cam.ac.uk";
   2.709 -    bsservers[4].hostname = "galaxian.cl.cam.ac.uk";
   2.710 -    bsservers[5].hostname = "firetrack.cl.cam.ac.uk";
   2.711 -    bsservers[6].hostname = "funfair.cl.cam.ac.uk";
   2.712 -    bsservers[7].hostname = "felix.cl.cam.ac.uk";
   2.713 +    bsservers[1].hostname = "planb.cl.cam.ac.uk";
   2.714 +    bsservers[2].hostname = "simcity.cl.cam.ac.uk";
   2.715 +    bsservers[3].hostname = NULL/*"gunfighter.cl.cam.ac.uk"*/;
   2.716 +    bsservers[4].hostname = NULL/*"galaxian.cl.cam.ac.uk"*/;
   2.717 +    bsservers[5].hostname = NULL/*"firetrack.cl.cam.ac.uk"*/;
   2.718 +    bsservers[6].hostname = NULL/*"funfair.cl.cam.ac.uk"*/;
   2.719 +    bsservers[7].hostname = NULL/*"felix.cl.cam.ac.uk"*/;
   2.720      bsservers[8].hostname = NULL;
   2.721      bsservers[9].hostname = NULL;
   2.722      bsservers[10].hostname = NULL;
     3.1 --- a/tools/blktap/blockstore.h	Mon Mar 21 09:52:57 2005 +0000
     3.2 +++ b/tools/blktap/blockstore.h	Tue Mar 22 15:50:43 2005 +0000
     3.3 @@ -40,6 +40,7 @@ struct bshdr_t_struct {
     3.4      u32            operation;
     3.5      u32            flags;
     3.6      u64            id;
     3.7 +    u64            luid;
     3.8  } __attribute__ ((packed));
     3.9  typedef struct bshdr_t_struct bshdr_t;
    3.10  
    3.11 @@ -52,12 +53,13 @@ typedef struct bsmsg_t_struct bsmsg_t;
    3.12  
    3.13  #define MSGBUFSIZE_OP    sizeof(u32)
    3.14  #define MSGBUFSIZE_FLAGS (sizeof(u32) + sizeof(u32))
    3.15 -#define MSGBUFSIZE_ID    (sizeof(u32) + sizeof(u32) + sizeof(u64))
    3.16 +#define MSGBUFSIZE_ID    (sizeof(u32) + sizeof(u32) + sizeof(u64) + sizeof(u64))
    3.17  #define MSGBUFSIZE_BLOCK sizeof(bsmsg_t)
    3.18  
    3.19  #define BSOP_READBLOCK  0x01
    3.20  #define BSOP_WRITEBLOCK 0x02
    3.21  #define BSOP_ALLOCBLOCK 0x03
    3.22 +#define BSOP_FREEBLOCK  0x04
    3.23  
    3.24  #define BSOP_FLAG_ERROR 0x01
    3.25