ia64/xen-unstable
changeset 4162:63bff8d75218
bitkeeper revision 1.1236.35.1 (42371b91vqaZiam66I7Q_46q67kWeg)
Added blockstored and initial distributed parallax backend.
Added blockstored and initial distributed parallax backend.
author | jrb44@plym.cl.cam.ac.uk |
---|---|
date | Tue Mar 15 17:29:53 2005 +0000 (2005-03-15) |
parents | 095b6204d7f7 |
children | 95637bf4672a |
files | .rootkeys BitKeeper/etc/ignore BitKeeper/etc/logging_ok tools/blktap/Makefile tools/blktap/blockstore.c tools/blktap/blockstore.h tools/blktap/blockstored.c tools/blktap/bstest.c |
line diff
1.1 --- a/.rootkeys Mon Mar 14 11:34:53 2005 +0000 1.2 +++ b/.rootkeys Tue Mar 15 17:29:53 2005 +0000 1.3 @@ -341,6 +341,8 @@ 42090340rc2q1wmlGn6HtiJAkqhtNQ tools/blk 1.4 42090340C-WkRPT7N3t-8Lzehzogdw tools/blktap/blktaplib.h 1.5 42277b02WrfP1meTDPv1M5swFq8oHQ tools/blktap/blockstore.c 1.6 42277b02P1C0FYj3gqwTZUD8sxKCug tools/blktap/blockstore.h 1.7 +42371b8aL1JsxAXOd4bBhmZKDyjiJg tools/blktap/blockstored.c 1.8 +42371b8aD_x3L9MKsXciMNqkuk58eQ tools/blktap/bstest.c 1.9 42090340B3mDvcxvd9ehDHUkg46hvw tools/blktap/libgnbd/Makefile 1.10 42090340ZWkc5Xhf9lpQmDON8HJXww tools/blktap/libgnbd/gnbdtest.c 1.11 42090340ocMiUScJE3OpY7QNunvSbg tools/blktap/libgnbd/libgnbd.c
2.1 --- a/BitKeeper/etc/ignore Mon Mar 14 11:34:53 2005 +0000 2.2 +++ b/BitKeeper/etc/ignore Tue Mar 15 17:29:53 2005 +0000 2.3 @@ -124,3 +124,5 @@ tools/blktap/vdi_validate 2.4 tools/blktap/xen/* 2.5 tools/cmdline/* 2.6 tools/tests/test_x86_emulator 2.7 +tools/blktap/blockstored 2.8 +tools/blktap/bstest
3.1 --- a/BitKeeper/etc/logging_ok Mon Mar 14 11:34:53 2005 +0000 3.2 +++ b/BitKeeper/etc/logging_ok Tue Mar 15 17:29:53 2005 +0000 3.3 @@ -33,6 +33,7 @@ iap10@pb001.cl.cam.ac.uk 3.4 iap10@pb007.cl.cam.ac.uk 3.5 iap10@striker.cl.cam.ac.uk 3.6 iap10@tetris.cl.cam.ac.uk 3.7 +jrb44@plym.cl.cam.ac.uk 3.8 jws22@gauntlet.cl.cam.ac.uk 3.9 jws@cairnwell.research 3.10 kaf24@camelot.eng.3leafnetworks.com
4.1 --- a/tools/blktap/Makefile Mon Mar 14 11:34:53 2005 +0000 4.2 +++ b/tools/blktap/Makefile Tue Mar 15 17:29:53 2005 +0000 4.3 @@ -141,6 +141,10 @@ vdi_fill: $(LIB) vdi_fill.c $(VDI_SRCS) 4.4 vdi_validate: $(LIB) vdi_validate.c $(VDI_SRCS) 4.5 $(CC) $(CFLAGS) -g3 -o vdi_validate vdi_validate.c $(VDI_SRCS) 4.6 4.7 +blockstored: blockstored.c 4.8 + $(CC) $(CFLAGS) -g3 -o blockstored blockstored.c 4.9 +bstest: bstest.c blockstore.c 4.10 + $(CC) $(CFLAGS) -g3 -o bstest bstest.c blockstore.c 4.11 4.12 rdx_cmp: $(LIB) rdx_cmp.c $(VDI_SRCS) 4.13 $(CC) $(CFLAGS) -g3 -o rdx_cmp rdx_cmp.c $(VDI_SRCS)
5.1 --- a/tools/blktap/blockstore.c Mon Mar 14 11:34:53 2005 +0000 5.2 +++ b/tools/blktap/blockstore.c Tue Mar 15 17:29:53 2005 +0000 5.3 @@ -15,6 +15,408 @@ 5.4 #include <sys/stat.h> 5.5 #include "blockstore.h" 5.6 5.7 +#define BLOCKSTORE_REMOTE 5.8 + 5.9 +#ifdef BLOCKSTORE_REMOTE 5.10 + 5.11 +//#define BSDEBUG 5.12 + 5.13 +#include <sys/socket.h> 5.14 +#include <sys/ioctl.h> 5.15 +#include <netinet/in.h> 5.16 +#include <netdb.h> 5.17 + 5.18 +#define ENTER_QUEUE_CR (void)0 5.19 +#define LEAVE_QUEUE_CR (void)0 5.20 + 5.21 +bsserver_t bsservers[MAX_SERVERS]; 5.22 +bscluster_t bsclusters[MAX_CLUSTERS]; 5.23 + 5.24 +struct sockaddr_in sin_local; 5.25 +int bssock = 0; 5.26 + 5.27 +typedef struct bsq_t_struct { 5.28 + struct bsq_t_struct *prev; 5.29 + struct bsq_t_struct *next; 5.30 + int server; 5.31 + int length; 5.32 + struct msghdr msghdr; 5.33 + struct iovec iov[2]; 5.34 + bshdr_t message; 5.35 + void *block; 5.36 +} bsq_t; 5.37 + 5.38 +bsq_t *bs_head = NULL; 5.39 +bsq_t *bs_tail = NULL; 5.40 + 5.41 +int send_message(bsq_t *qe) { 5.42 + int rc; 5.43 + 5.44 + qe->msghdr.msg_name = (void *)&(bsservers[qe->server].sin); 5.45 + qe->msghdr.msg_namelen = sizeof(struct sockaddr_in); 5.46 + qe->msghdr.msg_iov = qe->iov; 5.47 + if (qe->block) 5.48 + qe->msghdr.msg_iovlen = 2; 5.49 + else 5.50 + qe->msghdr.msg_iovlen = 1; 5.51 + qe->msghdr.msg_control = NULL; 5.52 + qe->msghdr.msg_controllen = 0; 5.53 + qe->msghdr.msg_flags = 0; 5.54 + 5.55 + qe->iov[0].iov_base = (void *)&(qe->message); 5.56 + qe->iov[0].iov_len = MSGBUFSIZE_ID; 5.57 + 5.58 + if (qe->block) { 5.59 + qe->iov[1].iov_base = qe->block; 5.60 + qe->iov[1].iov_len = BLOCK_SIZE; 5.61 + } 5.62 + 5.63 + rc = sendmsg(bssock, &(qe->msghdr), 0); 5.64 + //rc = sendto(bssock, (void *)&(qe->message), qe->length, 0, 5.65 + // (struct sockaddr *)&(bsservers[qe->server].sin), 5.66 + // sizeof(struct sockaddr_in)); 5.67 + if (rc < 0) 5.68 + return rc; 5.69 + 5.70 + ENTER_QUEUE_CR; 5.71 + 5.72 + LEAVE_QUEUE_CR; 5.73 + 5.74 + return rc; 5.75 +} 5.76 + 5.77 +int recv_message(bsq_t *qe) { 5.78 + struct sockaddr_in from; 5.79 + //int flen = sizeof(from); 5.80 + int rc; 5.81 + 5.82 + qe->msghdr.msg_name = &from; 5.83 + qe->msghdr.msg_namelen = sizeof(struct sockaddr_in); 5.84 + qe->msghdr.msg_iov = qe->iov; 5.85 + if (qe->block) 5.86 + qe->msghdr.msg_iovlen = 2; 5.87 + else 5.88 + qe->msghdr.msg_iovlen = 1; 5.89 + qe->msghdr.msg_control = NULL; 5.90 + qe->msghdr.msg_controllen = 0; 5.91 + qe->msghdr.msg_flags = 0; 5.92 + 5.93 + qe->iov[0].iov_base = (void *)&(qe->message); 5.94 + qe->iov[0].iov_len = MSGBUFSIZE_ID; 5.95 + if (qe->block) { 5.96 + qe->iov[1].iov_base = qe->block; 5.97 + qe->iov[1].iov_len = BLOCK_SIZE; 5.98 + } 5.99 + 5.100 + rc = recvmsg(bssock, &(qe->msghdr), 0); 5.101 + 5.102 + //return recvfrom(bssock, (void *)&(qe->message), sizeof(bsmsg_t), 0, 5.103 + // (struct sockaddr *)&from, &flen); 5.104 + return rc; 5.105 +} 5.106 + 5.107 +void *readblock_indiv(int server, u64 id) { 5.108 + void *block; 5.109 + bsq_t *qe; 5.110 + int len; 5.111 + 5.112 + qe = (bsq_t *)malloc(sizeof(bsq_t)); 5.113 + if (!qe) { 5.114 + perror("readblock qe malloc"); 5.115 + return NULL; 5.116 + } 5.117 + qe->block = malloc(BLOCK_SIZE); 5.118 + if (!qe->block) { 5.119 + perror("readblock qe malloc"); 5.120 + free((void *)qe); 5.121 + return NULL; 5.122 + } 5.123 + 5.124 + qe->server = server; 5.125 + 5.126 + qe->message.operation = BSOP_READBLOCK; 5.127 + qe->message.flags = 0; 5.128 + qe->message.id = id; 5.129 + qe->length = MSGBUFSIZE_ID; 5.130 + 5.131 + if (send_message(qe) < 0) { 5.132 + perror("readblock sendto"); 5.133 + goto err; 5.134 + } 5.135 + 5.136 + len = recv_message(qe); 5.137 + if (len < 0) { 5.138 + perror("readblock recv"); 5.139 + goto err; 5.140 + } 5.141 + if ((qe->message.flags & BSOP_FLAG_ERROR)) { 5.142 + fprintf(stderr, "readblock server error\n"); 5.143 + goto err; 5.144 + } 5.145 + if (len < MSGBUFSIZE_BLOCK) { 5.146 + fprintf(stderr, "readblock recv short (%u)\n", len); 5.147 + goto err; 5.148 + } 5.149 + if ((block = malloc(BLOCK_SIZE)) == NULL) { 5.150 + perror("readblock malloc"); 5.151 + goto err; 5.152 + } 5.153 + //memcpy(block, qe->message.block, BLOCK_SIZE); 5.154 + block = qe->block; 5.155 + 5.156 + free((void *)qe); 5.157 + return block; 5.158 + 5.159 + err: 5.160 + free(qe->block); 5.161 + free((void *)qe); 5.162 + return NULL; 5.163 +} 5.164 + 5.165 +/** 5.166 + * readblock: read a block from disk 5.167 + * @id: block id to read 5.168 + * 5.169 + * @return: pointer to block, NULL on error 5.170 + */ 5.171 +void *readblock(u64 id) { 5.172 + int map = (int)BSID_MAP(id); 5.173 + u64 xid; 5.174 + static int i = CLUSTER_MAX_REPLICAS - 1; 5.175 + void *block = NULL; 5.176 + 5.177 + /* special case for the "superblock" just use the first block on the 5.178 + * first replica. (extend to blocks < 6 for vdi bug) 5.179 + */ 5.180 + if (id < 6) { 5.181 + block = readblock_indiv(bsclusters[map].servers[0], id); 5.182 + goto out; 5.183 + } 5.184 + 5.185 + i++; 5.186 + if (i >= CLUSTER_MAX_REPLICAS) 5.187 + i = 0; 5.188 + switch (i) { 5.189 + case 0: 5.190 + xid = BSID_REPLICA0(id); 5.191 + break; 5.192 + case 1: 5.193 + xid = BSID_REPLICA1(id); 5.194 + break; 5.195 + case 2: 5.196 + xid = BSID_REPLICA2(id); 5.197 + break; 5.198 + } 5.199 + 5.200 + block = readblock_indiv(bsclusters[map].servers[i], xid); 5.201 + 5.202 + out: 5.203 +#ifdef BSDEBUG 5.204 + if (block) 5.205 + fprintf(stderr, "READ: %016llx %02x%02x %02x%02x %02x%02x %02x%02x\n", 5.206 + id, 5.207 + (unsigned int)((unsigned char *)block)[0], 5.208 + (unsigned int)((unsigned char *)block)[1], 5.209 + (unsigned int)((unsigned char *)block)[2], 5.210 + (unsigned int)((unsigned char *)block)[3], 5.211 + (unsigned int)((unsigned char *)block)[4], 5.212 + (unsigned int)((unsigned char *)block)[5], 5.213 + (unsigned int)((unsigned char *)block)[6], 5.214 + (unsigned int)((unsigned char *)block)[7]); 5.215 + else 5.216 + fprintf(stderr, "READ: %016llx NULL\n", id); 5.217 +#endif 5.218 + return block; 5.219 +} 5.220 + 5.221 +int writeblock_indiv(int server, u64 id, void *block) { 5.222 + bsq_t *qe; 5.223 + int len; 5.224 + 5.225 + qe = (bsq_t *)malloc(sizeof(bsq_t)); 5.226 + if (!qe) { 5.227 + perror("writeblock qe malloc"); 5.228 + goto err; 5.229 + } 5.230 + qe->server = server; 5.231 + 5.232 + qe->message.operation = BSOP_WRITEBLOCK; 5.233 + qe->message.flags = 0; 5.234 + qe->message.id = id; 5.235 + //memcpy(qe->message.block, block, BLOCK_SIZE); 5.236 + qe->block = block; 5.237 + qe->length = MSGBUFSIZE_BLOCK; 5.238 + 5.239 + if (send_message(qe) < 0) { 5.240 + perror("writeblock sendto"); 5.241 + goto err; 5.242 + } 5.243 + 5.244 + len = recv_message(qe); 5.245 + if (len < 0) { 5.246 + perror("writeblock recv"); 5.247 + goto err; 5.248 + } 5.249 + if ((qe->message.flags & BSOP_FLAG_ERROR)) { 5.250 + fprintf(stderr, "writeblock server error\n"); 5.251 + goto err; 5.252 + } 5.253 + if (len < MSGBUFSIZE_ID) { 5.254 + fprintf(stderr, "writeblock recv short (%u)\n", len); 5.255 + goto err; 5.256 + } 5.257 + 5.258 + free((void *)qe); 5.259 + return 0; 5.260 + 5.261 + err: 5.262 + free((void *)qe); 5.263 + return -1; 5.264 +} 5.265 + 5.266 +/** 5.267 + * writeblock: write an existing block to disk 5.268 + * @id: block id 5.269 + * @block: pointer to block 5.270 + * 5.271 + * @return: zero on success, -1 on failure 5.272 + */ 5.273 +int writeblock(u64 id, void *block) { 5.274 + int map = (int)BSID_MAP(id); 5.275 + 5.276 + int rep0 = bsclusters[map].servers[0]; 5.277 + int rep1 = bsclusters[map].servers[1]; 5.278 + int rep2 = bsclusters[map].servers[2]; 5.279 + 5.280 +#ifdef BSDEBUG 5.281 + fprintf(stderr, 5.282 + "WRITE: %016llx %02x%02x %02x%02x %02x%02x %02x%02x\n", 5.283 + id, 5.284 + (unsigned int)((unsigned char *)block)[0], 5.285 + (unsigned int)((unsigned char *)block)[1], 5.286 + (unsigned int)((unsigned char *)block)[2], 5.287 + (unsigned int)((unsigned char *)block)[3], 5.288 + (unsigned int)((unsigned char *)block)[4], 5.289 + (unsigned int)((unsigned char *)block)[5], 5.290 + (unsigned int)((unsigned char *)block)[6], 5.291 + (unsigned int)((unsigned char *)block)[7]); 5.292 +#endif 5.293 + 5.294 +/* special case for the "superblock" just use the first block on the 5.295 + * first replica. (extend to blocks < 6 for vdi bug) 5.296 + */ 5.297 + if (id < 6) { 5.298 + return writeblock_indiv(rep0, id, block); 5.299 + } 5.300 + 5.301 + if (writeblock_indiv(rep0, BSID_REPLICA0(id), block) < 0) 5.302 + return -1; 5.303 + if (writeblock_indiv(rep1, BSID_REPLICA1(id), block) < 0) 5.304 + return -1; 5.305 + if (writeblock_indiv(rep2, BSID_REPLICA2(id), block) < 0) 5.306 + return -1; 5.307 + return 0; 5.308 +} 5.309 + 5.310 +/** 5.311 + * allocblock: write a new block to disk 5.312 + * @block: pointer to block 5.313 + * 5.314 + * @return: new id of block on disk 5.315 + */ 5.316 +u64 allocblock(void *block) { 5.317 + return allocblock_hint(block, 0); 5.318 +} 5.319 + 5.320 +u64 allocblock_hint_indiv(int server, void *block, u64 hint) { 5.321 + bsq_t *qe; 5.322 + int len; 5.323 + 5.324 + qe = (bsq_t *)malloc(sizeof(bsq_t)); 5.325 + if (!qe) { 5.326 + perror("allocblock_hint qe malloc"); 5.327 + goto err; 5.328 + } 5.329 + qe->server = server; 5.330 + 5.331 + qe->message.operation = BSOP_ALLOCBLOCK; 5.332 + qe->message.flags = 0; 5.333 + qe->message.id = hint; 5.334 + //memcpy(qe->message.block, block, BLOCK_SIZE); 5.335 + qe->block = block; 5.336 + qe->length = MSGBUFSIZE_BLOCK; 5.337 + 5.338 + if (send_message(qe) < 0) { 5.339 + perror("allocblock_hint sendto"); 5.340 + goto err; 5.341 + } 5.342 + 5.343 + len = recv_message(qe); 5.344 + if (len < 0) { 5.345 + perror("allocblock_hint recv"); 5.346 + goto err; 5.347 + } 5.348 + if ((qe->message.flags & BSOP_FLAG_ERROR)) { 5.349 + fprintf(stderr, "allocblock_hint server error\n"); 5.350 + goto err; 5.351 + } 5.352 + if (len < MSGBUFSIZE_ID) { 5.353 + fprintf(stderr, "allocblock_hint recv short (%u)\n", len); 5.354 + goto err; 5.355 + } 5.356 + 5.357 + free((void *)qe); 5.358 + return qe->message.id; 5.359 + 5.360 + err: 5.361 + free((void *)qe); 5.362 + return 0; 5.363 +} 5.364 + 5.365 +/** 5.366 + * allocblock_hint: write a new block to disk 5.367 + * @block: pointer to block 5.368 + * @hint: allocation hint 5.369 + * 5.370 + * @return: new id of block on disk 5.371 + */ 5.372 +u64 allocblock_hint(void *block, u64 hint) { 5.373 + int map = (int)hint; 5.374 + 5.375 + int rep0 = bsclusters[map].servers[0]; 5.376 + int rep1 = bsclusters[map].servers[1]; 5.377 + int rep2 = bsclusters[map].servers[2]; 5.378 + 5.379 + u64 id0, id1, id2; 5.380 + 5.381 + id0 = allocblock_hint_indiv(rep0, block, 0); 5.382 + if (id0 == 0) 5.383 + return 0; 5.384 + id1 = allocblock_hint_indiv(rep1, block, 0); 5.385 + if (id1 == 0) 5.386 + return 0; 5.387 + id2 = allocblock_hint_indiv(rep2, block, 0); 5.388 + if (id2 == 0) 5.389 + return 0; 5.390 + 5.391 +#ifdef BSDEBUG 5.392 + fprintf(stderr, "ALLOC: %016llx %02x%02x %02x%02x %02x%02x %02x%02x\n", 5.393 + BSID(map, id0, id1, id2), 5.394 + (unsigned int)((unsigned char *)block)[0], 5.395 + (unsigned int)((unsigned char *)block)[1], 5.396 + (unsigned int)((unsigned char *)block)[2], 5.397 + (unsigned int)((unsigned char *)block)[3], 5.398 + (unsigned int)((unsigned char *)block)[4], 5.399 + (unsigned int)((unsigned char *)block)[5], 5.400 + (unsigned int)((unsigned char *)block)[6], 5.401 + (unsigned int)((unsigned char *)block)[7]); 5.402 +#endif 5.403 + 5.404 + return BSID(map, id0, id1, id2); 5.405 +} 5.406 + 5.407 +#else /* /BLOCKSTORE_REMOTE */ 5.408 + 5.409 static int block_fp = -1; 5.410 5.411 /** 5.412 @@ -94,6 +496,18 @@ u64 allocblock(void *block) { 5.413 return lb; 5.414 } 5.415 5.416 +/** 5.417 + * allocblock_hint: write a new block to disk 5.418 + * @block: pointer to block 5.419 + * @hint: allocation hint 5.420 + * 5.421 + * @return: new id of block on disk 5.422 + */ 5.423 +u64 allocblock_hint(void *block, u64 hint) { 5.424 + return allocblock(block); 5.425 +} 5.426 + 5.427 +#endif /* BLOCKSTORE_REMOTE */ 5.428 5.429 /** 5.430 * newblock: get a new in-memory block set to zeros 5.431 @@ -124,12 +538,92 @@ void freeblock(void *block) { 5.432 5.433 int __init_blockstore(void) 5.434 { 5.435 +#ifdef BLOCKSTORE_REMOTE 5.436 + struct hostent *addr; 5.437 + int i; 5.438 + 5.439 + bsservers[0].hostname = "firebug.cl.cam.ac.uk"; 5.440 + bsservers[1].hostname = "tetris.cl.cam.ac.uk"; 5.441 + bsservers[2].hostname = "donkeykong.cl.cam.ac.uk"; 5.442 + bsservers[3].hostname = "gunfighter.cl.cam.ac.uk"; 5.443 + bsservers[4].hostname = "galaxian.cl.cam.ac.uk"; 5.444 + bsservers[5].hostname = "firetrack.cl.cam.ac.uk"; 5.445 + bsservers[6].hostname = "funfair.cl.cam.ac.uk"; 5.446 + bsservers[7].hostname = "felix.cl.cam.ac.uk"; 5.447 + bsservers[8].hostname = NULL; 5.448 + bsservers[9].hostname = NULL; 5.449 + bsservers[10].hostname = NULL; 5.450 + bsservers[11].hostname = NULL; 5.451 + bsservers[12].hostname = NULL; 5.452 + bsservers[13].hostname = NULL; 5.453 + bsservers[14].hostname = NULL; 5.454 + bsservers[15].hostname = NULL; 5.455 + 5.456 + for (i = 0; i < MAX_SERVERS; i++) { 5.457 + if (!bsservers[i].hostname) 5.458 + continue; 5.459 + addr = gethostbyname(bsservers[i].hostname); 5.460 + if (!addr) { 5.461 + perror("bad hostname"); 5.462 + return -1; 5.463 + } 5.464 + bsservers[i].sin.sin_family = addr->h_addrtype; 5.465 + bsservers[i].sin.sin_port = htons(BLOCKSTORED_PORT); 5.466 + bsservers[i].sin.sin_addr.s_addr = 5.467 + ((struct in_addr *)(addr->h_addr))->s_addr; 5.468 + } 5.469 + 5.470 + /* Cluster map 5.471 + */ 5.472 + bsclusters[0].servers[0] = 0; 5.473 + bsclusters[0].servers[1] = 1; 5.474 + bsclusters[0].servers[2] = 2; 5.475 + bsclusters[1].servers[0] = 1; 5.476 + bsclusters[1].servers[1] = 2; 5.477 + bsclusters[1].servers[2] = 3; 5.478 + bsclusters[2].servers[0] = 2; 5.479 + bsclusters[2].servers[1] = 3; 5.480 + bsclusters[2].servers[2] = 4; 5.481 + bsclusters[3].servers[0] = 3; 5.482 + bsclusters[3].servers[1] = 4; 5.483 + bsclusters[3].servers[2] = 5; 5.484 + bsclusters[4].servers[0] = 4; 5.485 + bsclusters[4].servers[1] = 5; 5.486 + bsclusters[4].servers[2] = 6; 5.487 + bsclusters[5].servers[0] = 5; 5.488 + bsclusters[5].servers[1] = 6; 5.489 + bsclusters[5].servers[2] = 7; 5.490 + bsclusters[6].servers[0] = 6; 5.491 + bsclusters[6].servers[1] = 7; 5.492 + bsclusters[6].servers[2] = 0; 5.493 + bsclusters[7].servers[0] = 7; 5.494 + bsclusters[7].servers[1] = 0; 5.495 + bsclusters[7].servers[2] = 1; 5.496 + 5.497 + /* Local socket set up 5.498 + */ 5.499 + bssock = socket(AF_INET, SOCK_DGRAM, 0); 5.500 + if (bssock < 0) { 5.501 + perror("Bad socket"); 5.502 + return -1; 5.503 + } 5.504 + memset(&sin_local, 0, sizeof(sin_local)); 5.505 + sin_local.sin_family = AF_INET; 5.506 + sin_local.sin_port = htons(BLOCKSTORED_PORT); 5.507 + sin_local.sin_addr.s_addr = htonl(INADDR_ANY); 5.508 + if (bind(bssock, (struct sockaddr *)&sin_local, sizeof(sin_local)) < 0) { 5.509 + perror("bind"); 5.510 + close(bssock); 5.511 + return -1; 5.512 + } 5.513 + 5.514 +#else /* /BLOCKSTORE_REMOTE */ 5.515 block_fp = open("blockstore.dat", O_RDWR | O_CREAT | O_LARGEFILE, 0644); 5.516 5.517 if (block_fp < 0) { 5.518 perror("open"); 5.519 return -1; 5.520 } 5.521 - 5.522 +#endif /* BLOCKSTORE_REMOTE */ 5.523 return 0; 5.524 }
6.1 --- a/tools/blktap/blockstore.h Mon Mar 14 11:34:53 2005 +0000 6.2 +++ b/tools/blktap/blockstore.h Tue Mar 15 17:29:53 2005 +0000 6.3 @@ -9,6 +9,7 @@ 6.4 #ifndef __BLOCKSTORE_H__ 6.5 #define __BLOCKSTORE_H__ 6.6 6.7 +#include <netinet/in.h> 6.8 #include <xc.h> 6.9 6.10 #define BLOCK_SIZE 4096 6.11 @@ -24,8 +25,83 @@ 6.12 extern void *newblock(); 6.13 extern void *readblock(u64 id); 6.14 extern u64 allocblock(void *block); 6.15 +extern u64 allocblock_hint(void *block, u64 hint); 6.16 extern int writeblock(u64 id, void *block); 6.17 extern void freeblock(void *block); 6.18 extern int __init_blockstore(void); 6.19 6.20 +#define ALLOCFAIL (((u64)(-1))) 6.21 + 6.22 +/* Distribution 6.23 + */ 6.24 +#define BLOCKSTORED_PORT 9346 6.25 + 6.26 +struct bshdr_t_struct { 6.27 + u32 operation; 6.28 + u32 flags; 6.29 + u64 id; 6.30 +} __attribute__ ((packed)); 6.31 +typedef struct bshdr_t_struct bshdr_t; 6.32 + 6.33 +struct bsmsg_t_struct { 6.34 + bshdr_t hdr; 6.35 + unsigned char block[BLOCK_SIZE]; 6.36 +} __attribute__ ((packed)); 6.37 + 6.38 +typedef struct bsmsg_t_struct bsmsg_t; 6.39 + 6.40 +#define MSGBUFSIZE_OP sizeof(u32) 6.41 +#define MSGBUFSIZE_FLAGS (sizeof(u32) + sizeof(u32)) 6.42 +#define MSGBUFSIZE_ID (sizeof(u32) + sizeof(u32) + sizeof(u64)) 6.43 +#define MSGBUFSIZE_BLOCK sizeof(bsmsg_t) 6.44 + 6.45 +#define BSOP_READBLOCK 0x01 6.46 +#define BSOP_WRITEBLOCK 0x02 6.47 +#define BSOP_ALLOCBLOCK 0x03 6.48 + 6.49 +#define BSOP_FLAG_ERROR 0x01 6.50 + 6.51 +#define BS_ALLOC_SKIP 10 6.52 +#define BS_ALLOC_HACK 6.53 + 6.54 +/* Remote hosts and cluster map - XXX need to generalise 6.55 + */ 6.56 + 6.57 +/* 6.58 + 6.59 + Interim ID format is 6.60 + 6.61 + 63 60 59 40 39 20 19 0 6.62 + +----+--------------------+--------------------+--------------------+ 6.63 + |map | replica 2 | replica 1 | replica 0 | 6.64 + +----+--------------------+--------------------+--------------------+ 6.65 + 6.66 + The map is an index into a table detailing which machines form the 6.67 + cluster. 6.68 + 6.69 + */ 6.70 + 6.71 +#define BSID_REPLICA0(_id) ((_id)&0xfffffULL) 6.72 +#define BSID_REPLICA1(_id) (((_id)>>20)&0xfffffULL) 6.73 +#define BSID_REPLICA2(_id) (((_id)>>40)&0xfffffULL) 6.74 +#define BSID_MAP(_id) (((_id)>>60)&0xfULL) 6.75 + 6.76 +#define BSID(_map, _rep0, _rep1, _rep2) ((((u64)(_map))<<60) | \ 6.77 + (((u64)(_rep2))<<40) | \ 6.78 + (((u64)(_rep1))<<20) | ((u64)(_rep0))) 6.79 + 6.80 +typedef struct bsserver_t_struct { 6.81 + char *hostname; 6.82 + struct sockaddr_in sin; 6.83 +} bsserver_t; 6.84 + 6.85 +#define MAX_SERVERS 16 6.86 + 6.87 +#define CLUSTER_MAX_REPLICAS 3 6.88 +typedef struct bscluster_t_struct { 6.89 + int servers[CLUSTER_MAX_REPLICAS]; 6.90 +} bscluster_t; 6.91 + 6.92 +#define MAX_CLUSTERS 16 6.93 + 6.94 #endif /* __BLOCKSTORE_H__ */
7.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 7.2 +++ b/tools/blktap/blockstored.c Tue Mar 15 17:29:53 2005 +0000 7.3 @@ -0,0 +1,276 @@ 7.4 +/************************************************************************** 7.5 + * 7.6 + * blockstored.c 7.7 + * 7.8 + * Block store daemon. 7.9 + * 7.10 + */ 7.11 + 7.12 +#include <fcntl.h> 7.13 +#include <unistd.h> 7.14 +#include <stdio.h> 7.15 +#include <stdlib.h> 7.16 +#include <string.h> 7.17 +#include <sys/types.h> 7.18 +#include <sys/stat.h> 7.19 +#include <sys/socket.h> 7.20 +#include <sys/ioctl.h> 7.21 +#include <netinet/in.h> 7.22 +#include <errno.h> 7.23 +#include "blockstore.h" 7.24 + 7.25 +//#define BSDEBUG 7.26 + 7.27 +int readblock_into(u64 id, void *block); 7.28 + 7.29 +int open_socket(u16 port) { 7.30 + 7.31 + struct sockaddr_in sn; 7.32 + int sock; 7.33 + 7.34 + sock = socket(AF_INET, SOCK_DGRAM, 0); 7.35 + if (sock < 0) { 7.36 + perror("Bad socket"); 7.37 + return -1; 7.38 + } 7.39 + memset(&sn, 0, sizeof(sn)); 7.40 + sn.sin_family = AF_INET; 7.41 + sn.sin_port = htons(port); 7.42 + sn.sin_addr.s_addr = htonl(INADDR_ANY); 7.43 + if (bind(sock, (struct sockaddr *)&sn, sizeof(sn)) < 0) { 7.44 + perror("bind"); 7.45 + close(sock); 7.46 + return -1; 7.47 + } 7.48 + 7.49 + return sock; 7.50 +} 7.51 + 7.52 +static int block_fp = -1; 7.53 +static int bssock = -1; 7.54 + 7.55 +int send_reply(struct sockaddr_in *peer, void *buffer, int len) { 7.56 + 7.57 + int rc; 7.58 + 7.59 +#ifdef BSDEBUG 7.60 + fprintf(stdout, "TX: %u bytes op=%u id=0x%llx\n", 7.61 + len, ((bsmsg_t *)buffer)->hdr.operation, ((bsmsg_t *)buffer)->hdr.id); 7.62 +#endif 7.63 + rc = sendto(bssock, buffer, len, 0, (struct sockaddr *)peer, sizeof(*peer)); 7.64 + if (rc < 0) { 7.65 + perror("send_reply"); 7.66 + return 1; 7.67 + } 7.68 + 7.69 + 7.70 + return 0; 7.71 +} 7.72 + 7.73 +static bsmsg_t msgbuf; 7.74 + 7.75 +void service_loop(void) { 7.76 + 7.77 + for (;;) { 7.78 + int rc, len; 7.79 + struct sockaddr_in from; 7.80 + size_t slen = sizeof(from); 7.81 + u64 bid; 7.82 + 7.83 + len = recvfrom(bssock, (void *)&msgbuf, sizeof(msgbuf), 0, 7.84 + (struct sockaddr *)&from, &slen); 7.85 + 7.86 + if (len < 0) { 7.87 + perror("recvfrom"); 7.88 + continue; 7.89 + } 7.90 + 7.91 + if (len < MSGBUFSIZE_OP) { 7.92 + fprintf(stderr, "Short packet.\n"); 7.93 + continue; 7.94 + } 7.95 + 7.96 +#ifdef BSDEBUG 7.97 + fprintf(stdout, "RX: %u bytes op=%u id=0x%llx\n", 7.98 + len, msgbuf.hdr.operation, msgbuf.hdr.id); 7.99 +#endif 7.100 + 7.101 + switch (msgbuf.hdr.operation) { 7.102 + case BSOP_READBLOCK: 7.103 + if (len < MSGBUFSIZE_ID) { 7.104 + fprintf(stderr, "Short packet (readblock %u).\n", len); 7.105 + continue; 7.106 + } 7.107 + rc = readblock_into(msgbuf.hdr.id, msgbuf.block); 7.108 + if (rc < 0) { 7.109 + fprintf(stderr, "readblock error\n"); 7.110 + msgbuf.hdr.flags = BSOP_FLAG_ERROR; 7.111 + send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_ID); 7.112 + continue; 7.113 + } 7.114 + msgbuf.hdr.flags = 0; 7.115 + send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_BLOCK); 7.116 + break; 7.117 + case BSOP_WRITEBLOCK: 7.118 + if (len < MSGBUFSIZE_BLOCK) { 7.119 + fprintf(stderr, "Short packet (writeblock %u).\n", len); 7.120 + continue; 7.121 + } 7.122 + rc = writeblock(msgbuf.hdr.id, msgbuf.block); 7.123 + if (rc < 0) { 7.124 + fprintf(stderr, "writeblock error\n"); 7.125 + msgbuf.hdr.flags = BSOP_FLAG_ERROR; 7.126 + send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_ID); 7.127 + continue; 7.128 + } 7.129 + msgbuf.hdr.flags = 0; 7.130 + send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_ID); 7.131 + break; 7.132 + case BSOP_ALLOCBLOCK: 7.133 + if (len < MSGBUFSIZE_BLOCK) { 7.134 + fprintf(stderr, "Short packet (allocblock %u).\n", len); 7.135 + continue; 7.136 + } 7.137 + bid = allocblock(msgbuf.block); 7.138 + if (bid == ALLOCFAIL) { 7.139 + fprintf(stderr, "allocblock error\n"); 7.140 + msgbuf.hdr.flags = BSOP_FLAG_ERROR; 7.141 + send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_ID); 7.142 + continue; 7.143 + } 7.144 + msgbuf.hdr.id = bid; 7.145 + msgbuf.hdr.flags = 0; 7.146 + send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_ID); 7.147 + break; 7.148 + } 7.149 + 7.150 + } 7.151 +} 7.152 + 7.153 +/** 7.154 + * readblock: read a block from disk 7.155 + * @id: block id to read 7.156 + * @block: pointer to buffer to receive block 7.157 + * 7.158 + * @return: 0 if OK, other on error 7.159 + */ 7.160 + 7.161 +int readblock_into(u64 id, void *block) { 7.162 + if (lseek64(block_fp, ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) { 7.163 + printf ("%Ld\n", (id - 1) * BLOCK_SIZE); 7.164 + perror("readblock lseek"); 7.165 + return -1; 7.166 + } 7.167 + if (read(block_fp, block, BLOCK_SIZE) != BLOCK_SIZE) { 7.168 + perror("readblock read"); 7.169 + return -1; 7.170 + } 7.171 + return 0; 7.172 +} 7.173 + 7.174 +/** 7.175 + * writeblock: write an existing block to disk 7.176 + * @id: block id 7.177 + * @block: pointer to block 7.178 + * 7.179 + * @return: zero on success, -1 on failure 7.180 + */ 7.181 +int writeblock(u64 id, void *block) { 7.182 + if (lseek64(block_fp, ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) { 7.183 + perror("writeblock lseek"); 7.184 + return -1; 7.185 + } 7.186 + if (write(block_fp, block, BLOCK_SIZE) < 0) { 7.187 + perror("writeblock write"); 7.188 + return -1; 7.189 + } 7.190 + return 0; 7.191 +} 7.192 + 7.193 +/** 7.194 + * allocblock: write a new block to disk 7.195 + * @block: pointer to block 7.196 + * 7.197 + * @return: new id of block on disk 7.198 + */ 7.199 +static u64 lastblock = 0; 7.200 + 7.201 +u64 allocblock(void *block) { 7.202 + u64 lb; 7.203 + off64_t pos; 7.204 + 7.205 + retry: 7.206 + pos = lseek64(block_fp, 0, SEEK_END); 7.207 + if (pos == (off64_t)-1) { 7.208 + perror("allocblock lseek"); 7.209 + return ALLOCFAIL; 7.210 + } 7.211 + if (pos % BLOCK_SIZE != 0) { 7.212 + fprintf(stderr, "file size not multiple of %d\n", BLOCK_SIZE); 7.213 + return ALLOCFAIL; 7.214 + } 7.215 + if (write(block_fp, block, BLOCK_SIZE) != BLOCK_SIZE) { 7.216 + perror("allocblock write"); 7.217 + return ALLOCFAIL; 7.218 + } 7.219 + lb = pos / BLOCK_SIZE + 1; 7.220 + 7.221 +#ifdef BS_ALLOC_HACK 7.222 + if (lb < BS_ALLOC_SKIP) 7.223 + goto retry; 7.224 +#endif 7.225 + 7.226 + if (lb <= lastblock) 7.227 + printf("[*** %Ld alredy allocated! ***]\n", lb); 7.228 + 7.229 + lastblock = lb; 7.230 + return lb; 7.231 +} 7.232 + 7.233 +/** 7.234 + * newblock: get a new in-memory block set to zeros 7.235 + * 7.236 + * @return: pointer to new block, NULL on error 7.237 + */ 7.238 +void *newblock() { 7.239 + void *block = malloc(BLOCK_SIZE); 7.240 + if (block == NULL) { 7.241 + perror("newblock"); 7.242 + return NULL; 7.243 + } 7.244 + memset(block, 0, BLOCK_SIZE); 7.245 + return block; 7.246 +} 7.247 + 7.248 + 7.249 +/** 7.250 + * freeblock: unallocate an in-memory block 7.251 + * @id: block id (zero if this is only in-memory) 7.252 + * @block: block to be freed 7.253 + */ 7.254 +void freeblock(void *block) { 7.255 + if (block != NULL) 7.256 + free(block); 7.257 +} 7.258 + 7.259 + 7.260 +int main(int argc, char **argv) 7.261 +{ 7.262 + block_fp = open("blockstore.dat", O_RDWR | O_CREAT | O_LARGEFILE, 0644); 7.263 + 7.264 + if (block_fp < 0) { 7.265 + perror("open"); 7.266 + return -1; 7.267 + } 7.268 + 7.269 + bssock = open_socket(BLOCKSTORED_PORT); 7.270 + if (bssock < 0) { 7.271 + return -1; 7.272 + } 7.273 + 7.274 + service_loop(); 7.275 + 7.276 + close(bssock); 7.277 + 7.278 + return 0; 7.279 +}
8.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 8.2 +++ b/tools/blktap/bstest.c Tue Mar 15 17:29:53 2005 +0000 8.3 @@ -0,0 +1,191 @@ 8.4 +/************************************************************************** 8.5 + * 8.6 + * bstest.c 8.7 + * 8.8 + * Block store daemon test program. 8.9 + * 8.10 + * usage: bstest <host>|X {r|w|a} ID 8.11 + * 8.12 + */ 8.13 + 8.14 +#include <fcntl.h> 8.15 +#include <unistd.h> 8.16 +#include <stdio.h> 8.17 +#include <stdlib.h> 8.18 +#include <string.h> 8.19 +#include <sys/types.h> 8.20 +#include <sys/stat.h> 8.21 +#include <sys/socket.h> 8.22 +#include <sys/ioctl.h> 8.23 +#include <netinet/in.h> 8.24 +#include <netdb.h> 8.25 +#include <errno.h> 8.26 +#include "blockstore.h" 8.27 + 8.28 +int direct(char *host, u32 op, u64 id, int len) { 8.29 + struct sockaddr_in sn, peer; 8.30 + int sock; 8.31 + bsmsg_t msgbuf; 8.32 + int rc, slen; 8.33 + struct hostent *addr; 8.34 + 8.35 + addr = gethostbyname(host); 8.36 + if (!addr) { 8.37 + perror("bad hostname"); 8.38 + exit(1); 8.39 + } 8.40 + peer.sin_family = addr->h_addrtype; 8.41 + peer.sin_port = htons(BLOCKSTORED_PORT); 8.42 + peer.sin_addr.s_addr = ((struct in_addr *)(addr->h_addr))->s_addr; 8.43 + fprintf(stderr, "Sending to: %u.%u.%u.%u\n", 8.44 + (unsigned int)(unsigned char)addr->h_addr[0], 8.45 + (unsigned int)(unsigned char)addr->h_addr[1], 8.46 + (unsigned int)(unsigned char)addr->h_addr[2], 8.47 + (unsigned int)(unsigned char)addr->h_addr[3]); 8.48 + 8.49 + sock = socket(AF_INET, SOCK_DGRAM, 0); 8.50 + if (sock < 0) { 8.51 + perror("Bad socket"); 8.52 + exit(1); 8.53 + } 8.54 + memset(&sn, 0, sizeof(sn)); 8.55 + sn.sin_family = AF_INET; 8.56 + sn.sin_port = htons(BLOCKSTORED_PORT); 8.57 + sn.sin_addr.s_addr = htonl(INADDR_ANY); 8.58 + if (bind(sock, (struct sockaddr *)&sn, sizeof(sn)) < 0) { 8.59 + perror("bind"); 8.60 + close(sock); 8.61 + exit(1); 8.62 + } 8.63 + 8.64 + memset((void *)&msgbuf, 0, sizeof(msgbuf)); 8.65 + msgbuf.operation = op; 8.66 + msgbuf.id = id; 8.67 + 8.68 + rc = sendto(sock, (void *)&msgbuf, len, 0, 8.69 + (struct sockaddr *)&peer, sizeof(peer)); 8.70 + if (rc < 0) { 8.71 + perror("sendto"); 8.72 + exit(1); 8.73 + } 8.74 + 8.75 + slen = sizeof(peer); 8.76 + len = recvfrom(sock, (void *)&msgbuf, sizeof(msgbuf), 0, 8.77 + (struct sockaddr *)&peer, &slen); 8.78 + if (len < 0) { 8.79 + perror("recvfrom"); 8.80 + exit(1); 8.81 + } 8.82 + 8.83 + printf("Reply %u bytes:\n", len); 8.84 + if (len >= MSGBUFSIZE_OP) 8.85 + printf(" operation: %u\n", msgbuf.operation); 8.86 + if (len >= MSGBUFSIZE_FLAGS) 8.87 + printf(" flags: 0x%x\n", msgbuf.flags); 8.88 + if (len >= MSGBUFSIZE_ID) 8.89 + printf(" id: %llu\n", msgbuf.id); 8.90 + if (len >= (MSGBUFSIZE_ID + 4)) 8.91 + printf(" data: %02x %02x %02x %02x...\n", 8.92 + (unsigned int)msgbuf.block[0], 8.93 + (unsigned int)msgbuf.block[1], 8.94 + (unsigned int)msgbuf.block[2], 8.95 + (unsigned int)msgbuf.block[3]); 8.96 + 8.97 + if (sock > 0) 8.98 + close(sock); 8.99 + 8.100 + return 0; 8.101 +} 8.102 + 8.103 +int main (int argc, char **argv) { 8.104 + 8.105 + u32 op = 0; 8.106 + u64 id = 0; 8.107 + int len = 0, rc; 8.108 + void *block; 8.109 + 8.110 + if (argc < 3) { 8.111 + fprintf(stderr, "usage: bstest <host>|X {r|w|a} ID\n"); 8.112 + return 1; 8.113 + } 8.114 + 8.115 + switch (argv[2][0]) { 8.116 + case 'r': 8.117 + case 'R': 8.118 + op = BSOP_READBLOCK; 8.119 + len = MSGBUFSIZE_ID; 8.120 + break; 8.121 + case 'w': 8.122 + case 'W': 8.123 + op = BSOP_WRITEBLOCK; 8.124 + len = MSGBUFSIZE_BLOCK; 8.125 + break; 8.126 + case 'a': 8.127 + case 'A': 8.128 + op = BSOP_ALLOCBLOCK; 8.129 + len = MSGBUFSIZE_BLOCK; 8.130 + break; 8.131 + default: 8.132 + fprintf(stderr, "Unknown action '%s'.\n", argv[2]); 8.133 + return 1; 8.134 + } 8.135 + 8.136 + if (argc >= 4) 8.137 + id = atoll(argv[3]); 8.138 + 8.139 + if (strcmp(argv[1], "X") == 0) { 8.140 + rc = __init_blockstore(); 8.141 + if (rc < 0) { 8.142 + fprintf(stderr, "blockstore init failed.\n"); 8.143 + return 1; 8.144 + } 8.145 + switch(op) { 8.146 + case BSOP_READBLOCK: 8.147 + block = readblock(id); 8.148 + if (block) { 8.149 + printf("data: %02x %02x %02x %02x...\n", 8.150 + (unsigned int)((unsigned char*)block)[0], 8.151 + (unsigned int)((unsigned char*)block)[1], 8.152 + (unsigned int)((unsigned char*)block)[2], 8.153 + (unsigned int)((unsigned char*)block)[3]); 8.154 + } 8.155 + break; 8.156 + case BSOP_WRITEBLOCK: 8.157 + block = malloc(BLOCK_SIZE); 8.158 + if (!block) { 8.159 + perror("bstest malloc"); 8.160 + return 1; 8.161 + } 8.162 + memset(block, 0, BLOCK_SIZE); 8.163 + rc = writeblock(id, block); 8.164 + if (rc != 0) { 8.165 + printf("error\n"); 8.166 + } 8.167 + else { 8.168 + printf("OK\n"); 8.169 + } 8.170 + break; 8.171 + case BSOP_ALLOCBLOCK: 8.172 + block = malloc(BLOCK_SIZE); 8.173 + if (!block) { 8.174 + perror("bstest malloc"); 8.175 + return 1; 8.176 + } 8.177 + memset(block, 0, BLOCK_SIZE); 8.178 + id = allocblock_hint(block, id); 8.179 + if (id == 0) { 8.180 + printf("error\n"); 8.181 + } 8.182 + else { 8.183 + printf("ID: %llu\n", id); 8.184 + } 8.185 + break; 8.186 + } 8.187 + } 8.188 + else { 8.189 + direct(argv[1], op, id, len); 8.190 + } 8.191 + 8.192 + 8.193 + return 0; 8.194 +}