ia64/xen-unstable

view tools/vnet/vnetd/vcache.c @ 6946:e703abaf6e3d

Add behaviour to the remove methods to remove the transaction's path itself. This allows us to write Remove(path) to remove the specified path rather than having to slice the path ourselves.
author emellor@ewan
date Sun Sep 18 14:42:13 2005 +0100 (2005-09-18)
parents 3233e7ecfa9f
children 06d84bf87159
line source
1 /*
2 * Copyright (C) 2004 Mike Wray <mike.wray@hp.com>.
3 *
4 * This library is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU Lesser General Public License as
6 * published by the Free Software Foundation; either version 2.1 of the
7 * License, or (at your option) any later version. This library is
8 * distributed in the hope that it will be useful, but WITHOUT ANY
9 * WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE.
11 * See the GNU Lesser General Public License for more details.
12 *
13 * You should have received a copy of the GNU Lesser General Public License
14 * along with this library; if not, write to the Free Software Foundation,
15 * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16 */
18 #include <stdlib.h>
19 #include <unistd.h>
20 #include <stdio.h>
21 #include <getopt.h>
22 #include <errno.h>
23 #include <sys/types.h>
24 #include <time.h>
25 #include <sys/socket.h>
26 #include <netinet/in.h>
27 #include <arpa/inet.h>
28 #include <string.h>
30 #include "allocate.h"
31 #include "hash_table.h"
32 #include "sys_net.h"
33 #include "sys_string.h"
34 #include "connection.h"
35 #include "marshal.h"
36 #include "timer.h"
38 #undef offsetof
39 #include "vnetd.h"
40 #include "vcache.h"
42 #define MODULE_NAME "VARP"
43 #define DEBUG 1
44 #undef DEBUG
45 #include "debug.h"
47 #include "varp_util.c"
49 static VarpCache *vcache = NULL;
51 void IPMessageQueue_init(IPMessageQueue *queue, int maxlen){
52 queue->msg = NULL;
53 queue->len = 0;
54 queue->maxlen = maxlen;
55 }
57 void IPMessageQueue_clear(IPMessageQueue *queue){
58 queue->msg = NULL;
59 queue->len = 0;
60 }
62 void IPMessageQueue_truncate(IPMessageQueue *queue, int n){
63 IPMessage **p = &queue->msg;
64 int i;
65 for(i = 1; *p; p = &(*p)->next, i++){
66 if(i == n){
67 *p = NULL;
68 break;
69 }
70 }
71 }
73 void IPMessageQueue_add(IPMessageQueue *queue, IPMessage *msg){
74 msg->next = queue->msg;
75 queue->msg = msg;
76 queue->len++;
77 if(queue->len >= queue->maxlen){
78 IPMessageQueue_truncate(queue, queue->maxlen);
79 }
80 }
82 IPMessage * IPMessageQueue_pop(IPMessageQueue *queue){
83 IPMessage *msg = NULL;
84 if(queue->len > 0){
85 queue->len--;
86 msg = queue->msg;
87 queue->msg = msg->next;
88 msg->next = NULL;
89 }
90 return msg;
91 }
93 void VarpCache_sweep(VarpCache *z, int all);
95 /** Send a varp protocol message.
96 *
97 * @param opcode varp opcode (host order)
98 * @param vnet vnet id (in network order)
99 * @param vmac vmac (in network order)
100 * @return 0 on success, error code otherwise
101 */
102 int varp_send(Conn *conn, uint16_t opcode, VnetId *vnet, Vmac *vmac, VarpAddr *addr){
103 int err = 0;
104 int varp_n = sizeof(VarpHdr);
105 VarpHdr varph = {};
106 #ifdef DEBUG
107 char vnetbuf[VNET_ID_BUF];
108 char addrbuf[VARP_ADDR_BUF];
109 #endif
111 varph.hdr.id = htons(VARP_ID);
112 varph.hdr.opcode = htons(opcode);
113 varph.vnet = *vnet;
114 varph.vmac = *vmac;
115 varph.addr = *addr;
117 if(0){
118 struct sockaddr_in self;
119 socklen_t self_n;
120 getsockname(conn->sock, (struct sockaddr *)&self, &self_n);
121 dprintf("> sockname addr=%s port=%d\n",
122 inet_ntoa(self.sin_addr), ntohs(self.sin_port));
123 }
124 dprintf("> addr=%s opcode=%d\n",
125 inet_ntoa(conn->addr.sin_addr), opcode);
126 dprintf("> vnet=%s vmac=" MACFMT " addr=%s\n",
127 VnetId_ntoa(vnet, vnetbuf),
128 MAC6TUPLE(vmac->mac),
129 VarpAddr_ntoa(addr, addrbuf));
130 err = marshal_bytes(conn->out, &varph, varp_n);
131 marshal_flush(conn->out);
132 dprintf("< err=%d\n", err);
133 return err;
134 }
136 /* Test some flags.
137 *
138 * @param z varp entry
139 * @param flags to test
140 * @return nonzero if flags set
141 */
142 int VCEntry_get_flags(VCEntry *z, int flags){
143 return z->flags & flags;
144 }
146 /** Set some flags.
147 *
148 * @param z varp entry
149 * @param flags to set
150 * @param set set flags on if nonzero, off if zero
151 * @return new flags value
152 */
153 int VCEntry_set_flags(VCEntry *z, int flags, int set){
154 if(set){
155 z->flags |= flags;
156 } else {
157 z->flags &= ~flags;
158 }
159 return z->flags;
160 }
162 /** Print a varp entry.
163 *
164 * @param ventry varp entry
165 */
166 void VCEntry_print(VCEntry *ventry){
167 if(ventry){
168 char *state, *flags;
169 char vnetbuf[VNET_ID_BUF];
170 char addrbuf[VARP_ADDR_BUF];
172 switch(ventry->state){
173 case VCACHE_STATE_INCOMPLETE: state = "INC"; break;
174 case VCACHE_STATE_REACHABLE: state = "RCH"; break;
175 case VCACHE_STATE_FAILED: state = "FLD"; break;
176 default: state = "UNK"; break;
177 }
178 flags = (VCEntry_get_flags(ventry, VCACHE_FLAG_PROBING) ? "P" : " ");
180 printf("VENTRY(%p %s %s vnet=%s vmac=" MACFMT " addr=%s time=%g)\n",
181 ventry,
182 state, flags,
183 VnetId_ntoa(&ventry->key.vnet, vnetbuf),
184 MAC6TUPLE(ventry->key.vmac.mac),
185 VarpAddr_ntoa(&ventry->addr, addrbuf),
186 ventry->timestamp);
187 } else {
188 printf("VENTRY: Null!\n");
189 }
190 }
192 int VCEntry_schedule(VCEntry *ventry);
193 void VCEntry_solicit(VCEntry *ventry);
195 /** Function called when a varp entry timer goes off.
196 * If the entry is still incomplete, carries on probing.
197 * Otherwise stops probing.
198 *
199 * @param arg ventry
200 */
201 static void ventry_timer_fn(Timer *timer){
202 VCEntry *ventry = timer->data;
203 int probing = 0, scheduled = 0;
205 //dprintf(">\n"); VCEntry_print(ventry);
206 if(ventry->state == VCACHE_STATE_REACHABLE){
207 // Do nothing.
208 } else {
209 // Probe if haven't run out of tries, otherwise fail.
210 if(ventry->probes < VCACHE_PROBE_MAX){
211 //probing = 1;
212 ventry->probes++;
213 scheduled = VCEntry_schedule(ventry);
214 //VCEntry_solicit(ventry);
215 probing = scheduled;
216 } else {
217 ventry->state = VCACHE_STATE_FAILED;
218 IPMessageQueue_clear(&ventry->queue);
219 }
220 }
221 if(!probing){
222 VCEntry_set_flags(ventry,
223 (VCACHE_FLAG_PROBING
224 | VCACHE_FLAG_REMOTE_PROBE
225 | VCACHE_FLAG_LOCAL_PROBE),
226 0);
227 }
228 VCEntry_set_flags(ventry, VCACHE_FLAG_PROBING, probing);
229 //dprintf("<\n");
230 }
232 /** Schedule the varp entry timer.
233 *
234 * @param ventry varp entry
235 */
236 int VCEntry_schedule(VCEntry *ventry){
237 int scheduled = 0;
238 if(ventry->probes == 1){
239 scheduled = 1;
240 Timer_set(VCACHE_LOCAL_DELAY, ventry_timer_fn, ventry);
241 } else {
242 VCEntry_solicit(ventry);
243 }
244 return scheduled;
245 }
247 /** Create a varp entry. Initializes the internal state.
248 *
249 * @param vnet vnet id
250 * @param vmac virtual MAC address (copied)
251 * @return ventry or null
252 */
253 VCEntry * VCEntry_new(VnetId *vnet, Vmac *vmac){
254 VCEntry *z = ALLOCATE(VCEntry);
255 z->state = VCACHE_STATE_INCOMPLETE;
256 z->timestamp = time_now();
257 z->key.vnet = *vnet;
258 z->key.vmac = *vmac;
259 return z;
260 }
262 /** Hash function for keys in the varp cache.
263 * Hashes the vnet id and mac.
264 *
265 * @param k key (VCKey)
266 * @return hashcode
267 */
268 Hashcode vcache_key_hash_fn(void *k){
269 VCKey *key = k;
270 Hashcode h = 0;
271 h = VnetId_hash(h, &key->vnet);
272 h = Vmac_hash(h, &key->vmac);
273 return h;
274 }
276 /** Test equality for keys in the varp cache.
277 * Compares vnet and mac.
278 *
279 * @param k1 key to compare (VCKey)
280 * @param k2 key to compare (VCKey)
281 * @return 1 if equal, 0 otherwise
282 */
283 int vcache_key_equal_fn(void *k1, void *k2){
284 VCKey *key1 = k1;
285 VCKey *key2 = k2;
286 return (VnetId_eq(&key1->vnet , &key2->vnet) &&
287 Vmac_eq(&key1->vmac, &key2->vmac));
288 }
290 void VarpCache_schedule(VarpCache *z);
292 /** Function called when the varp table timer goes off.
293 * Sweeps old varp cache entries and reschedules itself.
294 *
295 * @param arg varp table
296 */
297 static void vcache_timer_fn(Timer *timer){
298 VarpCache *z = timer->data;
299 //dprintf("> z=%p\n", z);
300 if(z){
301 VarpCache_sweep(z, 0);
302 VarpCache_schedule(z);
303 }
304 //dprintf("<\n");
305 }
307 /** Schedule the varp table timer.
308 *
309 * @param z varp table
310 */
311 void VarpCache_schedule(VarpCache *z){
312 Timer_set(VCACHE_ENTRY_TTL, vcache_timer_fn, z);
313 }
315 /** Print a varp table.
316 *
317 * @param z table
318 */
319 void VarpCache_print(VarpCache *z){
320 HashTable_for_decl(entry);
321 VCEntry *ventry;
323 dprintf(">\n");
324 HashTable_for_each(entry, vcache->table){
325 ventry = entry->value;
326 VCEntry_print(ventry);
327 }
328 dprintf("<\n");
329 }
331 /** Print the varp cache.
332 */
333 void vcache_print(void){
334 VarpCache_print(vcache);
335 }
337 /** Create a varp table.
338 *
339 * @return new table or null
340 */
341 VarpCache * VarpCache_new(void){
342 VarpCache *z = NULL;
344 z = ALLOCATE(VarpCache);
345 z->table = HashTable_new(VCACHE_BUCKETS);
346 z->table->key_equal_fn = vcache_key_equal_fn;
347 z->table->key_hash_fn = vcache_key_hash_fn;
348 VarpCache_schedule(z);
349 return z;
350 }
352 /** Add a new entry to the varp table.
353 *
354 * @param z table
355 * @param vnet vnet id
356 * @param vmac virtual MAC address (copied)
357 * @return new entry or null
358 */
359 VCEntry * VarpCache_add(VarpCache *z, VnetId *vnet, Vmac *vmac){
360 VCEntry *ventry;
361 HTEntry *entry;
363 ventry = VCEntry_new(vnet, vmac);
364 //dprintf("> "); VCEntry_print(ventry);
365 entry = HashTable_add(z->table, ventry, ventry);
366 return ventry;
367 }
369 /** Remove an entry from the varp table.
370 *
371 * @param z table
372 * @param ventry entry to remove
373 * @return removed count
374 */
375 int VarpCache_remove(VarpCache *z, VCEntry *ventry){
376 return HashTable_remove(z->table, ventry);
377 }
379 /** Lookup an entry in the varp table.
380 *
381 * @param z table
382 * @param vnet vnet id
383 * @param vmac virtual MAC addres
384 * @return entry found or null
385 */
386 VCEntry * VarpCache_lookup(VarpCache *z, VnetId *vnet, Vmac *vmac){
387 VCKey key = { .vnet = *vnet, .vmac = *vmac };
388 VCEntry *ventry;
389 ventry = HashTable_get(z->table, &key);
390 return ventry;
391 }
393 void VCEntry_solicit(VCEntry *ventry){
394 dprintf(">\n");
395 if(VCEntry_get_flags(ventry, VCACHE_FLAG_LOCAL_PROBE)){
396 dprintf("> local probe\n");
397 varp_send(vnetd->bcast_conn, VARP_OP_REQUEST,
398 &ventry->key.vnet, &ventry->key.vmac, &ventry->addr);
399 }
400 if(VCEntry_get_flags(ventry, VCACHE_FLAG_REMOTE_PROBE)){
401 ConnList *l;
402 dprintf("> remote probe\n");
403 for(l = vnetd->connections; l; l = l->next){
404 varp_send(l->conn, VARP_OP_REQUEST,
405 &ventry->key.vnet, &ventry->key.vmac, &ventry->addr);
406 }
408 }
409 dprintf("<\n");
410 }
412 int VCEntry_resolve(VCEntry *ventry, IPMessage *msg, int flags){
413 int err = 0;
415 dprintf("> "); //VCEntry_print(ventry);
416 ventry->state = VCACHE_STATE_INCOMPLETE;
417 VCEntry_set_flags(ventry, flags, 1);
418 IPMessageQueue_add(&ventry->queue, msg);
419 if(!VCEntry_get_flags(ventry, VCACHE_FLAG_PROBING)){
420 VCEntry_set_flags(ventry, VCACHE_FLAG_PROBING, 1);
421 ventry->probes = 1;
422 VCEntry_schedule(ventry);
423 //VCEntry_solicit(ventry);
424 }
425 dprintf("< err=%d\n", err);
426 return err;
427 }
429 /** Update a ventry. Sets the address and state to those given
430 * and sets the timestamp to 'now'.
431 *
432 * @param ventry varp entry
433 * @param addr care-of address
434 * @param state state
435 * @return 0 on success, error code otherwise
436 */
437 int VCEntry_update(VCEntry *ventry, IPMessage *msg, VarpHdr *varph, int state){
438 int err = 0;
439 double now = time_now();
441 if(VCEntry_get_flags(ventry, VCACHE_FLAG_PERMANENT)) goto exit;
442 ventry->addr = varph->addr;
443 ventry->timestamp = now;
444 ventry->state = state;
445 if(ventry->state == VCACHE_STATE_REACHABLE){
446 // Process the output queue.
447 IPMessage *msg;
448 while((msg = IPMessageQueue_pop(&ventry->queue))){
449 dprintf("> announce\n");
450 varp_send(msg->conn, VARP_OP_ANNOUNCE,
451 &ventry->key.vnet, &ventry->key.vmac, &ventry->addr);
452 }
453 }
454 exit:
455 return err;
456 }
458 /** Update the ventry corresponding to the given varp header.
459 *
460 * @param z table
461 * @param varph varp header
462 * @param state state
463 * @return 0 on success, -ENOENT if no entry found
464 */
465 int VarpCache_update(VarpCache *z, IPMessage *msg, VarpHdr *varph, int state){
466 int err = 0;
467 VCEntry *ventry;
469 dprintf(">\n");
470 ventry = VarpCache_lookup(z, &varph->vnet, &varph->vmac);
471 if(ventry){
472 err = VCEntry_update(ventry, msg, varph, state);
473 } else {
474 err = -ENOENT;
475 }
476 dprintf("< err=%d\n", err);
477 return err;
478 }
481 /** Put old varp entries into the incomplete state.
482 * Permanent entries are not changed.
483 * If 'all' is non-zero, all non-permanent entries
484 * are put into the incomplete state, regardless of age.
485 *
486 * @param z table
487 * @param all reset all entries if non-zero
488 */
489 void VarpCache_sweep(VarpCache *z, int all){
490 HashTable_for_decl(entry);
491 VCEntry *ventry;
492 double now = time_now();
493 double old = now - VCACHE_ENTRY_TTL;
495 dprintf(">\n");
496 HashTable_for_each(entry, vcache->table){
497 ventry = entry->value;
498 if(!VCEntry_get_flags(ventry, VCACHE_FLAG_PERMANENT) &&
499 (all || (ventry->timestamp < old))){
500 ventry->state = VCACHE_STATE_INCOMPLETE;
501 }
502 }
503 dprintf("<\n");
504 }
506 /** Forward a varp message.
507 * If local forwards it to remote vnetds.
508 * If not local forwards it to local net.
509 *
510 * @param varph varp message to forward
511 * @param local whether it's local or not
512 */
513 void vcache_forward_varp(VarpHdr *varph, int local){
514 uint16_t opcode = ntohs(varph->hdr.opcode);
515 if(local){
516 ConnList *l;
517 for(l = vnetd->connections; l; l = l->next){
518 varp_send(l->conn, opcode, &varph->vnet, &varph->vmac, &varph->addr);
519 }
520 } else {
521 varp_send(vnetd->bcast_conn, opcode, &varph->vnet, &varph->vmac, &varph->addr);
522 }
523 }
525 /** Handle a varp request.
526 *
527 * @param msg incoming message
528 * @param varph varp message
529 * @return 0 if ok, -ENOENT if no matching vif, or error code
530 */
531 #if 1
532 int vcache_handle_request(IPMessage *msg, VarpHdr *varph, int local){
533 dprintf("> local=%d\n", local);
534 vcache_forward_varp(varph, local);
535 dprintf("<\n");
536 return 0;
537 }
539 #else
540 int vcache_handle_request(IPMessage *msg, VarpHdr *varph, int local){
541 int err = -ENOENT;
542 VnetId *vnet;
543 Vmac *vmac;
544 VCEntry *ventry = NULL;
545 int reply = 0;
547 dprintf(">\n");
548 vnet = &varph->vnet;
549 vmac = &varph->vmac;
550 ventry = VarpCache_lookup(vcache, vnet, vmac);
551 if(!ventry){
552 ventry = VarpCache_add(vcache, vnet, vmac);
553 }
554 if(local){
555 // Request coming from the local subnet (on our udp port).
556 if(ventry->state == VCACHE_STATE_REACHABLE){
557 if(local){
558 // Have an entry, and it's non-local - reply (locally).
559 // Potential out-of-date cache problem.
560 // Should query remotely instead of replying.
561 varp_send(conn, VARP_OP_ANNOUNCE, ventry);
562 }
563 } else {
564 // Incomplete entry. Resolve.
565 VCEntry_resolve(ventry, msg, VCACHE_FLAG_REMOTE_PROBE);
566 }
567 } else {
568 // Non-local request (on one of our tcp connetions).
569 if(ventry->state == VCACHE_STATE_REACHABLE){
570 if(local){
571 // Have an entry and it's local - reply (remotely).
572 // Potential out-of-date cache problem.
573 // Should query locally instead of replying.
574 varp_send(msg->conn, VARP_OP_ANNOUNCE, ventry);
575 } else {
576 // Have a non-local entry - do nothing and assume someone else
577 // will reply.
578 }
579 } else {
580 // Incomplete entry. Resolve.
581 VCEntry_resolve(ventry, msg, VCACHE_FLAG_LOCAL_PROBE);
582 }
583 }
584 exit:
585 dprintf("< err=%d\n", err);
586 return err;
587 }
588 #endif
590 /** Handle a varp announce message.
591 * Update the matching ventry if we have one.
592 *
593 * @param msg incoming message
594 * @param varp message
595 * @return 0 if OK, -ENOENT if no matching entry
596 */
597 int vcache_handle_announce(IPMessage *msg, VarpHdr *varph, int local){
598 int err = 0;
600 vcache_forward_varp(varph, local);
601 err = VarpCache_update(vcache, msg, varph, VCACHE_STATE_REACHABLE);
602 return err;
603 }
605 /** Handle an incoming varp message.
606 *
607 * @param msg incoming message
608 * @return 0 if OK, error code otherwise
609 */
610 int vcache_handle_message(IPMessage *msg, int local){
611 int err = -EINVAL;
612 VnetMsg *vmsg = msg->data;
613 VarpHdr *varph = &vmsg->varp.varph;
615 dprintf(">\n");
616 #ifdef DEBUG
617 {
618 char vnetbuf[VNET_ID_BUF];
619 dprintf("> src=%s:%d\n", inet_ntoa(msg->saddr.sin_addr), ntohs(msg->saddr.sin_port));
620 dprintf("> dst=%s:%d\n", inet_ntoa(msg->daddr.sin_addr), ntohs(msg->daddr.sin_port));
621 dprintf("> opcode=%d vnet=%s vmac=" MACFMT "\n",
622 ntohs(varph->opcode),
623 VnetId_ntoa(&varph->vnet, vnetbuf),
624 MAC6TUPLE(varph->vmac.mac));
625 }
626 #endif
627 switch(ntohs(varph->hdr.opcode)){
628 case VARP_OP_REQUEST:
629 err = vcache_handle_request(msg, varph, local);
630 break;
631 case VARP_OP_ANNOUNCE:
632 err = vcache_handle_announce(msg, varph, local);
633 break;
634 default:
635 break;
636 }
637 dprintf("< err=%d\n", err);
638 return err;
639 }
641 /** Initialize the varp cache.
642 *
643 * @return 0 on success, error code otherwise
644 */
645 int vcache_init(void){
646 int err = 0;
648 if(!vcache){
649 vcache = VarpCache_new();
650 }
651 return err;
652 }