ia64/xen-unstable

view tools/vnet/vnetd/vcache.c @ 8740:3d7ea7972b39

Update patches for linux 2.6.15.

Signed-off-by: Christian Limpach <Christian.Limpach@cl.cam.ac.uk>
author cl349@firebug.cl.cam.ac.uk
date Thu Feb 02 17:16:00 2006 +0000 (2006-02-02)
parents 06d84bf87159
children
line source
1 /*
2 * Copyright (C) 2004 Mike Wray <mike.wray@hp.com>.
3 *
4 * This library is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU Lesser General Public License as
6 * published by the Free Software Foundation; either version 2.1 of the
7 * License, or (at your option) any later version. This library is
8 * distributed in the hope that it will be useful, but WITHOUT ANY
9 * WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE.
11 * See the GNU Lesser General Public License for more details.
12 *
13 * You should have received a copy of the GNU Lesser General Public License
14 * along with this library; if not, write to the Free Software Foundation,
15 * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16 */
18 #include <stdlib.h>
19 #include <unistd.h>
20 #include <stdio.h>
21 #include <getopt.h>
22 #include <errno.h>
23 #include <sys/types.h>
24 #include <time.h>
25 #include <sys/socket.h>
26 #include <netinet/in.h>
27 #include <arpa/inet.h>
28 #include <string.h>
30 #include "allocate.h"
31 #include "hash_table.h"
32 #include "sys_net.h"
33 #include "sys_string.h"
34 #include "connection.h"
35 #include "marshal.h"
36 #include "timer.h"
38 #undef offsetof
39 #include "vnetd.h"
40 #include "vcache.h"
42 #define MODULE_NAME "VARP"
43 #define DEBUG 1
44 #undef DEBUG
45 #include "debug.h"
47 #include "varp_util.c"
49 static VarpCache *vcache = NULL;
51 void IPMessageQueue_init(IPMessageQueue *queue, int maxlen){
52 queue->msg = NULL;
53 queue->len = 0;
54 queue->maxlen = maxlen;
55 }
57 void IPMessageQueue_clear(IPMessageQueue *queue){
58 queue->msg = NULL;
59 queue->len = 0;
60 }
62 void IPMessageQueue_truncate(IPMessageQueue *queue, int n){
63 IPMessage **p = &queue->msg;
64 int i;
65 for(i = 1; *p; p = &(*p)->next, i++){
66 if(i == n){
67 *p = NULL;
68 break;
69 }
70 }
71 }
73 void IPMessageQueue_add(IPMessageQueue *queue, IPMessage *msg){
74 msg->next = queue->msg;
75 queue->msg = msg;
76 queue->len++;
77 if(queue->len >= queue->maxlen){
78 IPMessageQueue_truncate(queue, queue->maxlen);
79 }
80 }
82 IPMessage * IPMessageQueue_pop(IPMessageQueue *queue){
83 IPMessage *msg = NULL;
84 if(queue->len > 0){
85 queue->len--;
86 msg = queue->msg;
87 queue->msg = msg->next;
88 msg->next = NULL;
89 }
90 return msg;
91 }
93 void VarpCache_sweep(VarpCache *z, int all);
95 /** Send a varp protocol message.
96 *
97 * @param opcode varp opcode (host order)
98 * @param vnet vnet id (in network order)
99 * @param vmac vmac (in network order)
100 * @return 0 on success, error code otherwise
101 */
102 int varp_send(Conn *conn, uint16_t opcode, VnetId *vnet, Vmac *vmac, VarpAddr *addr){
103 int err = 0;
104 int varp_n = sizeof(VarpHdr);
105 VarpHdr varph = {};
106 #ifdef DEBUG
107 char vnetbuf[VNET_ID_BUF];
108 char addrbuf[VARP_ADDR_BUF];
109 #endif
111 varph.hdr.id = htons(VARP_ID);
112 varph.hdr.opcode = htons(opcode);
113 varph.vnet = *vnet;
114 varph.vmac = *vmac;
115 varph.addr = *addr;
117 if(0){
118 struct sockaddr_in self;
119 socklen_t self_n;
120 getsockname(conn->sock, (struct sockaddr *)&self, &self_n);
121 dprintf("> sockname addr=%s port=%d\n",
122 inet_ntoa(self.sin_addr), ntohs(self.sin_port));
123 }
124 dprintf("> addr=%s opcode=%d\n",
125 inet_ntoa(conn->addr.sin_addr), opcode);
126 dprintf("> vnet=%s vmac=" MACFMT " addr=%s\n",
127 VnetId_ntoa(vnet, vnetbuf),
128 MAC6TUPLE(vmac->mac),
129 VarpAddr_ntoa(addr, addrbuf));
130 err = marshal_bytes(conn->out, &varph, varp_n);
131 marshal_flush(conn->out);
132 dprintf("< err=%d\n", err);
133 return err;
134 }
136 /* Test some flags.
137 *
138 * @param z varp entry
139 * @param flags to test
140 * @return nonzero if flags set
141 */
142 int VCEntry_get_flags(VCEntry *z, int flags){
143 return z->flags & flags;
144 }
146 /** Set some flags.
147 *
148 * @param z varp entry
149 * @param flags to set
150 * @param set set flags on if nonzero, off if zero
151 * @return new flags value
152 */
153 int VCEntry_set_flags(VCEntry *z, int flags, int set){
154 if(set){
155 z->flags |= flags;
156 } else {
157 z->flags &= ~flags;
158 }
159 return z->flags;
160 }
162 /** Print a varp entry.
163 *
164 * @param ventry varp entry
165 */
166 void VCEntry_print(VCEntry *ventry){
167 if(ventry){
168 char *state, *flags;
169 char vnetbuf[VNET_ID_BUF];
170 char addrbuf[VARP_ADDR_BUF];
172 switch(ventry->state){
173 case VCACHE_STATE_INCOMPLETE: state = "INC"; break;
174 case VCACHE_STATE_REACHABLE: state = "RCH"; break;
175 case VCACHE_STATE_FAILED: state = "FLD"; break;
176 default: state = "UNK"; break;
177 }
178 flags = (VCEntry_get_flags(ventry, VCACHE_FLAG_PROBING) ? "P" : " ");
180 printf("VENTRY(%p %s %s vnet=%s vmac=" MACFMT " addr=%s time=%g)\n",
181 ventry,
182 state, flags,
183 VnetId_ntoa(&ventry->key.vnet, vnetbuf),
184 MAC6TUPLE(ventry->key.vmac.mac),
185 VarpAddr_ntoa(&ventry->addr, addrbuf),
186 ventry->timestamp);
187 } else {
188 printf("VENTRY: Null!\n");
189 }
190 }
192 int VCEntry_schedule(VCEntry *ventry);
193 void VCEntry_solicit(VCEntry *ventry);
195 /** Function called when a varp entry timer goes off.
196 * If the entry is still incomplete, carries on probing.
197 * Otherwise stops probing.
198 *
199 * @param arg ventry
200 */
201 static void ventry_timer_fn(Timer *timer){
202 VCEntry *ventry = timer->data;
203 int probing = 0, scheduled = 0;
205 //dprintf(">\n"); VCEntry_print(ventry);
206 if(ventry->state == VCACHE_STATE_REACHABLE){
207 // Do nothing.
208 } else {
209 // Probe if haven't run out of tries, otherwise fail.
210 if(ventry->probes < VCACHE_PROBE_MAX){
211 //probing = 1;
212 ventry->probes++;
213 scheduled = VCEntry_schedule(ventry);
214 //VCEntry_solicit(ventry);
215 probing = scheduled;
216 } else {
217 ventry->state = VCACHE_STATE_FAILED;
218 IPMessageQueue_clear(&ventry->queue);
219 }
220 }
221 if(!probing){
222 VCEntry_set_flags(ventry,
223 (VCACHE_FLAG_PROBING
224 | VCACHE_FLAG_REMOTE_PROBE
225 | VCACHE_FLAG_LOCAL_PROBE),
226 0);
227 }
228 VCEntry_set_flags(ventry, VCACHE_FLAG_PROBING, probing);
229 //dprintf("<\n");
230 }
232 /** Schedule the varp entry timer.
233 *
234 * @param ventry varp entry
235 */
236 int VCEntry_schedule(VCEntry *ventry){
237 int scheduled = 0;
238 if(ventry->probes == 1){
239 scheduled = 1;
240 Timer_set(VCACHE_LOCAL_DELAY, ventry_timer_fn, ventry);
241 } else {
242 VCEntry_solicit(ventry);
243 }
244 return scheduled;
245 }
247 /** Create a varp entry. Initializes the internal state.
248 *
249 * @param vnet vnet id
250 * @param vmac virtual MAC address (copied)
251 * @return ventry or null
252 */
253 VCEntry * VCEntry_new(VnetId *vnet, Vmac *vmac){
254 VCEntry *z = ALLOCATE(VCEntry);
255 z->state = VCACHE_STATE_INCOMPLETE;
256 z->timestamp = time_now();
257 z->key.vnet = *vnet;
258 z->key.vmac = *vmac;
259 return z;
260 }
262 /** Hash function for keys in the varp cache.
263 * Hashes the vnet id and mac.
264 *
265 * @param k key (VCKey)
266 * @return hashcode
267 */
268 Hashcode vcache_key_hash_fn(void *k){
269 VCKey *key = k;
270 Hashcode h = 0;
271 h = VnetId_hash(h, &key->vnet);
272 h = Vmac_hash(h, &key->vmac);
273 return h;
274 }
276 /** Test equality for keys in the varp cache.
277 * Compares vnet and mac.
278 *
279 * @param k1 key to compare (VCKey)
280 * @param k2 key to compare (VCKey)
281 * @return 1 if equal, 0 otherwise
282 */
283 int vcache_key_equal_fn(void *k1, void *k2){
284 VCKey *key1 = k1;
285 VCKey *key2 = k2;
286 return (VnetId_eq(&key1->vnet , &key2->vnet) &&
287 Vmac_eq(&key1->vmac, &key2->vmac));
288 }
290 void VarpCache_schedule(VarpCache *z);
292 /** Function called when the varp table timer goes off.
293 * Sweeps old varp cache entries and reschedules itself.
294 *
295 * @param arg varp table
296 */
297 static void vcache_timer_fn(Timer *timer){
298 VarpCache *z = timer->data;
299 //dprintf("> z=%p\n", z);
300 if(z){
301 VarpCache_sweep(z, 0);
302 VarpCache_schedule(z);
303 }
304 //dprintf("<\n");
305 }
307 /** Schedule the varp table timer.
308 *
309 * @param z varp table
310 */
311 void VarpCache_schedule(VarpCache *z){
312 Timer_set(VCACHE_ENTRY_TTL, vcache_timer_fn, z);
313 }
315 /** Print a varp table.
316 *
317 * @param z table
318 */
319 void VarpCache_print(VarpCache *z){
320 HashTable_for_decl(entry);
321 VCEntry *ventry;
323 dprintf(">\n");
324 HashTable_for_each(entry, vcache->table){
325 ventry = entry->value;
326 VCEntry_print(ventry);
327 }
328 dprintf("<\n");
329 }
331 /** Print the varp cache.
332 */
333 void vcache_print(void){
334 VarpCache_print(vcache);
335 }
337 /** Create a varp table.
338 *
339 * @return new table or null
340 */
341 VarpCache * VarpCache_new(void){
342 VarpCache *z = NULL;
344 z = ALLOCATE(VarpCache);
345 z->table = HashTable_new(VCACHE_BUCKETS);
346 z->table->key_equal_fn = vcache_key_equal_fn;
347 z->table->key_hash_fn = vcache_key_hash_fn;
348 VarpCache_schedule(z);
349 return z;
350 }
352 /** Add a new entry to the varp table.
353 *
354 * @param z table
355 * @param vnet vnet id
356 * @param vmac virtual MAC address (copied)
357 * @return new entry or null
358 */
359 VCEntry * VarpCache_add(VarpCache *z, VnetId *vnet, Vmac *vmac){
360 VCEntry *ventry;
361 HTEntry *entry;
363 ventry = VCEntry_new(vnet, vmac);
364 //dprintf("> "); VCEntry_print(ventry);
365 entry = HashTable_add(z->table, ventry, ventry);
366 return ventry;
367 }
369 /** Remove an entry from the varp table.
370 *
371 * @param z table
372 * @param ventry entry to remove
373 * @return removed count
374 */
375 int VarpCache_remove(VarpCache *z, VCEntry *ventry){
376 return HashTable_remove(z->table, ventry);
377 }
379 /** Lookup an entry in the varp table.
380 *
381 * @param z table
382 * @param vnet vnet id
383 * @param vmac virtual MAC addres
384 * @return entry found or null
385 */
386 VCEntry * VarpCache_lookup(VarpCache *z, VnetId *vnet, Vmac *vmac){
387 VCKey key = { .vnet = *vnet, .vmac = *vmac };
388 VCEntry *ventry;
389 ventry = HashTable_get(z->table, &key);
390 return ventry;
391 }
393 void VCEntry_solicit(VCEntry *ventry){
394 dprintf(">\n");
395 if(VCEntry_get_flags(ventry, VCACHE_FLAG_LOCAL_PROBE)){
396 dprintf("> local probe\n");
397 varp_send(vnetd->bcast_conn, VARP_OP_REQUEST,
398 &ventry->key.vnet, &ventry->key.vmac, &ventry->addr);
399 }
400 if(VCEntry_get_flags(ventry, VCACHE_FLAG_REMOTE_PROBE)){
401 ConnList *l;
402 dprintf("> remote probe\n");
403 for(l = vnetd->connections; l; l = l->next){
404 varp_send(l->conn, VARP_OP_REQUEST,
405 &ventry->key.vnet, &ventry->key.vmac, &ventry->addr);
406 }
408 }
409 dprintf("<\n");
410 }
412 int VCEntry_resolve(VCEntry *ventry, IPMessage *msg, int flags){
413 int err = 0;
415 dprintf("> "); //VCEntry_print(ventry);
416 ventry->state = VCACHE_STATE_INCOMPLETE;
417 VCEntry_set_flags(ventry, flags, 1);
418 IPMessageQueue_add(&ventry->queue, msg);
419 if(!VCEntry_get_flags(ventry, VCACHE_FLAG_PROBING)){
420 VCEntry_set_flags(ventry, VCACHE_FLAG_PROBING, 1);
421 ventry->probes = 1;
422 VCEntry_schedule(ventry);
423 //VCEntry_solicit(ventry);
424 }
425 dprintf("< err=%d\n", err);
426 return err;
427 }
429 /** Update a ventry. Sets the address and state to those given
430 * and sets the timestamp to 'now'.
431 *
432 * @param ventry varp entry
433 * @param addr care-of address
434 * @param state state
435 * @return 0 on success, error code otherwise
436 */
437 int VCEntry_update(VCEntry *ventry, IPMessage *msg, VarpHdr *varph, int state){
438 int err = 0;
439 double now = time_now();
441 if(VCEntry_get_flags(ventry, VCACHE_FLAG_PERMANENT)) goto exit;
442 ventry->addr = varph->addr;
443 ventry->timestamp = now;
444 ventry->state = state;
445 if(ventry->state == VCACHE_STATE_REACHABLE){
446 // Process the output queue.
447 IPMessage *msg;
448 while((msg = IPMessageQueue_pop(&ventry->queue))){
449 dprintf("> announce\n");
450 varp_send(msg->conn, VARP_OP_ANNOUNCE,
451 &ventry->key.vnet, &ventry->key.vmac, &ventry->addr);
452 }
453 }
454 exit:
455 return err;
456 }
458 /** Update the ventry corresponding to the given varp header.
459 *
460 * @param z table
461 * @param varph varp header
462 * @param state state
463 * @return 0 on success, -ENOENT if no entry found
464 */
465 int VarpCache_update(VarpCache *z, IPMessage *msg, VarpHdr *varph, int state){
466 int err = 0;
467 VCEntry *ventry;
469 dprintf(">\n");
470 ventry = VarpCache_lookup(z, &varph->vnet, &varph->vmac);
471 if(ventry){
472 err = VCEntry_update(ventry, msg, varph, state);
473 } else {
474 err = -ENOENT;
475 }
476 dprintf("< err=%d\n", err);
477 return err;
478 }
481 /** Put old varp entries into the incomplete state.
482 * Permanent entries are not changed.
483 * If 'all' is non-zero, all non-permanent entries
484 * are put into the incomplete state, regardless of age.
485 *
486 * @param z table
487 * @param all reset all entries if non-zero
488 */
489 void VarpCache_sweep(VarpCache *z, int all){
490 HashTable_for_decl(entry);
491 VCEntry *ventry;
492 double now = time_now();
493 double old = now - VCACHE_ENTRY_TTL;
495 dprintf(">\n");
496 HashTable_for_each(entry, vcache->table){
497 ventry = entry->value;
498 if(!VCEntry_get_flags(ventry, VCACHE_FLAG_PERMANENT) &&
499 (all || (ventry->timestamp < old))){
500 ventry->state = VCACHE_STATE_INCOMPLETE;
501 }
502 }
503 dprintf("<\n");
504 }
506 /** Forward a varp message.
507 * If local forwards it to remote vnetds.
508 * If not local forwards it to local net.
509 *
510 * @param varph varp message to forward
511 * @param local whether it's local or not
512 */
513 void vcache_forward_varp(VarpHdr *varph, int local){
514 uint16_t opcode = ntohs(varph->hdr.opcode);
515 if(local){
516 ConnList *l;
517 for(l = vnetd->connections; l; l = l->next){
518 varp_send(l->conn, opcode, &varph->vnet, &varph->vmac, &varph->addr);
519 }
520 } else {
521 varp_send(vnetd->bcast_conn, opcode, &varph->vnet, &varph->vmac, &varph->addr);
522 }
523 }
525 /** Handle a varp request.
526 *
527 * @param msg incoming message
528 * @param varph varp message
529 * @return 0 if ok, -ENOENT if no matching vif, or error code
530 */
531 #if 1
532 int vcache_handle_request(IPMessage *msg, VarpHdr *varph, int local){
533 dprintf("> local=%d\n", local);
534 vcache_forward_varp(varph, local);
535 dprintf("<\n");
536 return 0;
537 }
539 #else
540 int vcache_handle_request(IPMessage *msg, VarpHdr *varph, int local){
541 int err = -ENOENT;
542 VnetId *vnet;
543 Vmac *vmac;
544 VCEntry *ventry = NULL;
545 int reply = 0;
547 dprintf(">\n");
548 vnet = &varph->vnet;
549 vmac = &varph->vmac;
550 ventry = VarpCache_lookup(vcache, vnet, vmac);
551 if(!ventry){
552 ventry = VarpCache_add(vcache, vnet, vmac);
553 }
554 if(local){
555 // Request coming from the local subnet (on our udp port).
556 if(ventry->state == VCACHE_STATE_REACHABLE){
557 if(local){
558 // Have an entry, and it's non-local - reply (locally).
559 // Potential out-of-date cache problem.
560 // Should query remotely instead of replying.
561 varp_send(conn, VARP_OP_ANNOUNCE, ventry);
562 }
563 } else {
564 // Incomplete entry. Resolve.
565 VCEntry_resolve(ventry, msg, VCACHE_FLAG_REMOTE_PROBE);
566 }
567 } else {
568 // Non-local request (on one of our tcp connetions).
569 if(ventry->state == VCACHE_STATE_REACHABLE){
570 if(local){
571 // Have an entry and it's local - reply (remotely).
572 // Potential out-of-date cache problem.
573 // Should query locally instead of replying.
574 varp_send(msg->conn, VARP_OP_ANNOUNCE, ventry);
575 } else {
576 // Have a non-local entry - do nothing and assume someone else
577 // will reply.
578 }
579 } else {
580 // Incomplete entry. Resolve.
581 VCEntry_resolve(ventry, msg, VCACHE_FLAG_LOCAL_PROBE);
582 }
583 }
584 exit:
585 dprintf("< err=%d\n", err);
586 return err;
587 }
588 #endif
590 /** Handle a varp announce message.
591 * Update the matching ventry if we have one.
592 *
593 * @param msg incoming message
594 * @param varp message
595 * @return 0 if OK, -ENOENT if no matching entry
596 */
597 int vcache_handle_announce(IPMessage *msg, VarpHdr *varph, int local){
598 int err = 0;
600 vcache_forward_varp(varph, local);
601 err = VarpCache_update(vcache, msg, varph, VCACHE_STATE_REACHABLE);
602 return err;
603 }
605 /** Handle an incoming varp message.
606 *
607 * @param msg incoming message
608 * @return 0 if OK, error code otherwise
609 */
610 int vcache_handle_message(IPMessage *msg, int local){
611 int err = -EINVAL;
612 VnetMsg *vmsg = msg->data;
613 VarpHdr *varph = &vmsg->varp.varph;
615 dprintf(">\n");
616 #ifdef DEBUG
617 {
618 char vnetbuf[VNET_ID_BUF];
619 dprintf("> src=%s:%d\n", inet_ntoa(msg->saddr.sin_addr), ntohs(msg->saddr.sin_port));
620 dprintf("> dst=%s:%d\n", inet_ntoa(msg->daddr.sin_addr), ntohs(msg->daddr.sin_port));
621 dprintf("> opcode=%d vnet=%s vmac=" MACFMT "\n",
622 ntohs(varph->opcode),
623 VnetId_ntoa(&varph->vnet, vnetbuf),
624 MAC6TUPLE(varph->vmac.mac));
625 }
626 #endif
627 switch(ntohs(varph->hdr.opcode)){
628 case VARP_OP_REQUEST:
629 err = vcache_handle_request(msg, varph, local);
630 break;
631 case VARP_OP_ANNOUNCE:
632 err = vcache_handle_announce(msg, varph, local);
633 break;
634 default:
635 break;
636 }
637 dprintf("< err=%d\n", err);
638 return err;
639 }
641 /** Initialize the varp cache.
642 *
643 * @return 0 on success, error code otherwise
644 */
645 int vcache_init(void){
646 int err = 0;
648 if(!vcache){
649 vcache = VarpCache_new();
650 }
651 return err;
652 }