ia64/xen-unstable

view tools/vnet/vnetd/vnetd.c @ 6946:e703abaf6e3d

Add behaviour to the remove methods to remove the transaction's path itself. This allows us to write Remove(path) to remove the specified path rather than having to slice the path ourselves.
author emellor@ewan
date Sun Sep 18 14:42:13 2005 +0100 (2005-09-18)
parents 3233e7ecfa9f
children 06d84bf87159
line source
1 /*
2 * Copyright (C) 2004 Mike Wray <mike.wray@hp.com>.
3 *
4 * This library is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU Lesser General Public License as
6 * published by the Free Software Foundation; either version 2.1 of the
7 * License, or (at your option) any later version. This library is
8 * distributed in the hope that it will be useful, but WITHOUT ANY
9 * WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE.
11 * See the GNU Lesser General Public License for more details.
12 *
13 * You should have received a copy of the GNU Lesser General Public License
14 * along with this library; if not, write to the Free Software Foundation,
15 * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16 */
17 /** @file
18 *
19 * Vnetd tcp messages:
20 *
21 * - varp request: request care-of-addr for a vif.
22 * If know answer, reply. If not broadcast locally.
23 *
24 * - varp announce: reply to a varp request.
25 * If a (local) request is pending, remember and broadcast locally.
26 *
27 * - vnet subscribe: indicate there are local vifs in a vnet (use varp announce?).
28 *
29 * - vnet forward: tunneled broadcast packet to rebroadcast.
30 * Broadcast locally (if there are vifs in the vnet).
31 *
32 *
33 * Vnetd udp messages (varp):
34 *
35 * - local varp request:
36 * If know and vif is non-local, reply.
37 * If know and vif is local, do nothing (but announce will reset).
38 * If have entry saying is local and no-one answers - remove (? or rely on entry timeout).
39 * If don't know and there is no (quick) local reply, forward to peers.
40 *
41 * - remote varp request:
42 * If know, reply.
43 * If don't know, query locally (and queue request).
44 *
45 * - varp announce: remember and adjust vnet subscriptions.
46 * Forward to peers if a request is pending.
47 *
48 * Vnetd broadcast messages (tunneling):
49 *
50 * - etherip: forward to peers (on the right vnets)
51 *
52 * - esp: forward to peers (on the right vnets)
53 *
54 *
55 * For etherip can tell the vnet from the header (in clear).
56 * But for esp can't. So should use mcast to define? Or always some clear header?
57 *
58 * Make ssl on tcp connections optional.
59 *
60 * So far have been assuming esp for security.
61 * But could use vnetd to forward and use ssl on the connection.
62 * But has usual probs with efficiency.
63 * However, should 'just work' if the coa for the vif has been set
64 * to the vnetd. How? Vnetd configured to act as gateway for
65 * some peers? Then would rewrite varp announce to itself and forward
66 * traffic to peer.
67 *
68 * Simplify - make each vnetd have one peer?
69 * If need to link more subnets, add vnetds?
70 *
71 * Need requests table for each tcp conn (incoming).
72 * - entries we want to resolve (and fwd the answer).
73 *
74 * Need requests table for the udp socket.
75 * - entries we want to resolve (and return the answer).
76 *
77 * Need table of entries we know.
78 * - from caching local announce
79 * - from caching announce reply to forwarded request
80 *
81 * Problem with replying to requests from the cache - if the cache
82 * is out of date we reply with incorrect data. So if a VM migrates
83 * we will advertise the old location until it times out.
84 *
85 * So should probably not reply out of the cache at all - but always
86 * query for the answer. Could query direct to old location if
87 * entry is valid the first time, and broadcast if no reply in timeout.
88 * Causes delay if migrated - may as well broadcast.
89 *
90 * Need to watch out for query loops. If have 3 vnetds A,B,C and
91 * A gets a query, forwards to B and C. B forwards to C, which
92 * forwards to A, and on forever. So if have an entry that has been
93 * probed, do not forward it when get another query for it.
94 *
95 * @author Mike Wray <mike.wray@hpl.hp.com>
96 */
99 #include <stdlib.h>
100 #include <unistd.h>
101 #include <stdio.h>
102 #include <getopt.h>
103 #include <errno.h>
104 #include <sys/types.h>
105 #include <time.h>
106 #include <sys/socket.h>
107 #include <netinet/in.h>
108 #include <arpa/inet.h>
109 #include <string.h>
111 #include <signal.h>
112 #include <sys/wait.h>
113 #include <sys/select.h>
115 #include <linux/ip.h> // For struct iphdr;
117 #include <linux/if_ether.h>
118 #include "if_etherip.h"
119 #include "if_varp.h"
121 #include "allocate.h"
123 #include "vnetd.h"
124 #include "file_stream.h"
125 #include "string_stream.h"
126 #include "socket_stream.h"
127 #include "sys_net.h"
129 #include "enum.h"
130 #include "sxpr.h"
132 #include "marshal.h"
133 #include "connection.h"
134 #include "select.h"
135 #include "timer.h"
136 #include "vcache.h"
138 int create_socket(int socktype, uint32_t saddr, uint32_t port, int flags, Conn **val);
140 #ifndef TRUE
141 #define TRUE 1
142 #endif
144 #ifndef FALSE
145 #define FALSE 0
146 #endif
148 /** Socket flags. */
149 enum {
150 VSOCK_REUSE=1,
151 VSOCK_BIND=2,
152 VSOCK_CONNECT=4,
153 VSOCK_BROADCAST=8,
154 VSOCK_MULTICAST=16,
155 };
157 #define PROGRAM "vnetd"
158 #define VERSION "0.1"
160 #define MODULE_NAME PROGRAM
161 #define DEBUG
162 #undef DEBUG
163 #include "debug.h"
165 #define OPT_PORT 'p'
166 #define KEY_PORT "port"
167 #define DOC_PORT "<port>\n\t" PROGRAM " UDP port (as a number or service name)"
169 #define OPT_ADDR 'm'
170 #define KEY_ADDR "mcaddr"
171 #define DOC_ADDR "<address>\n\t" PROGRAM " multicast address"
173 #define OPT_PEER 'r'
174 #define KEY_PEER "peer"
175 #define DOC_PEER "<peer>\n\t Peer " PROGRAM " to connect to (IP address or hostname)"
177 #define OPT_FILE 'f'
178 #define KEY_FILE "file"
179 #define DOC_FILE "<file>\n\t Configuration file to load"
181 #define OPT_CTRL 'c'
182 #define KEY_CTRL "control"
183 #define DOC_CTRL "<port>\n\t " PROGRAM " control port (as a number or service name)"
185 #define OPT_HELP 'h'
186 #define KEY_HELP "help"
187 #define DOC_HELP "\n\tprint help"
189 #define OPT_VERSION 'v'
190 #define KEY_VERSION "version"
191 #define DOC_VERSION "\n\tprint version"
193 #define OPT_VERBOSE 'V'
194 #define KEY_VERBOSE "verbose"
195 #define DOC_VERBOSE "\n\tverbose flag"
197 /** Print a usage message.
198 * Prints to stdout if err is zero, and exits with 0.
199 * Prints to stderr if err is non-zero, and exits with 1.
200 *
201 * @param err error code
202 */
203 static void usage(int err){
204 FILE *out = (err ? stderr : stdout);
206 fprintf(out, "Usage: %s [options]\n", PROGRAM);
207 fprintf(out, "-%c, --%s %s\n", OPT_ADDR, KEY_ADDR, DOC_ADDR);
208 fprintf(out, "-%c, --%s %s\n", OPT_PORT, KEY_PORT, DOC_PORT);
209 fprintf(out, "-%c, --%s %s\n", OPT_PEER, KEY_PEER, DOC_PEER);
210 fprintf(out, "-%c, --%s %s\n", OPT_VERBOSE, KEY_VERBOSE, DOC_VERBOSE);
211 fprintf(out, "-%c, --%s %s\n", OPT_VERSION, KEY_VERSION, DOC_VERSION);
212 fprintf(out, "-%c, --%s %s\n", OPT_HELP, KEY_HELP, DOC_HELP);
213 exit(err ? 1 : 0);
214 }
216 /** Short options. Options followed by ':' take an argument. */
217 static char *short_opts = (char[]){
218 OPT_ADDR, ':',
219 OPT_PORT, ':',
220 OPT_PEER, ':',
221 OPT_HELP,
222 OPT_VERSION,
223 OPT_VERBOSE,
224 0 };
226 /** Long options. */
227 static struct option const long_opts[] = {
228 { KEY_ADDR, required_argument, NULL, OPT_ADDR },
229 { KEY_PORT, required_argument, NULL, OPT_PORT },
230 { KEY_PEER, required_argument, NULL, OPT_PEER },
231 { KEY_HELP, no_argument, NULL, OPT_HELP },
232 { KEY_VERSION, no_argument, NULL, OPT_VERSION },
233 { KEY_VERBOSE, no_argument, NULL, OPT_VERBOSE },
234 { NULL, 0, NULL, 0 }
235 };
237 /** Get address of vnetd. So we can ignore broadcast traffic
238 * we sent ourselves.
239 *
240 * @param addr
241 * @return 0 on success, error code otherwise
242 */
243 int get_self_addr(struct sockaddr_in *addr){
244 int err = 0;
245 char hostname[1024] = {};
246 unsigned long saddr;
248 //dprintf(">\n");
249 err = gethostname(hostname, sizeof(hostname) -1);
250 if(err) goto exit;
251 err = get_host_address(hostname, &saddr);
252 if(err == 0){ err = -ENOENT; goto exit; }
253 err = 0;
254 addr->sin_addr.s_addr = saddr;
255 exit:
256 //dprintf("< err=%d\n", err);
257 return err;
258 }
260 /** Marshal a message.
261 *
262 * @param io destination
263 * @param msg message
264 * @return number of bytes written, or negative error code
265 */
266 int VnetMsg_marshal(IOStream *io, VnetMsg *msg){
267 int err = 0;
268 int hdr_n = sizeof(VnetMsgHdr);
270 err = marshal_uint16(io, msg->hdr.id);
271 if(err < 0) goto exit;
272 err = marshal_uint16(io, msg->hdr.opcode);
273 if(err < 0) goto exit;
274 switch(msg->hdr.id){
275 case VNET_VARP_ID:
276 err = marshal_bytes(io, ((char*)msg) + hdr_n, sizeof(VarpHdr) - hdr_n);
277 break;
278 case VNET_FWD_ID:
279 err = marshal_uint16(io, msg->fwd.protocol);
280 if(err < 0) goto exit;
281 err = marshal_uint16(io, msg->fwd.len);
282 if(err < 0) goto exit;
283 err = marshal_bytes(io, msg->fwd.data, msg->fwd.len);
284 break;
285 default:
286 err = -EINVAL;
287 break;
288 }
289 exit:
290 return err;
291 }
293 /** Unmarshal a message.
294 *
295 * @param io source
296 * @param msg message to unmarshal into
297 * @return number of bytes read, or negative error code
298 */
299 int VnetMsg_unmarshal(IOStream *io, VnetMsg *msg){
300 int err = 0;
301 int hdr_n = sizeof(VnetMsgHdr);
303 dprintf("> id\n");
304 err = unmarshal_uint16(io, &msg->hdr.id);
305 if(err < 0) goto exit;
306 dprintf("> opcode\n");
307 err = unmarshal_uint16(io, &msg->hdr.opcode);
308 if(err < 0) goto exit;
309 switch(msg->hdr.id){
310 case VNET_VARP_ID:
311 msg->hdr.opcode = htons(msg->hdr.opcode);
312 dprintf("> varp hdr_n=%d varphdr=%d\n", hdr_n, sizeof(VarpHdr));
313 err = unmarshal_bytes(io, ((char*)msg) + hdr_n, sizeof(VarpHdr) - hdr_n);
314 break;
315 case VNET_FWD_ID:
316 dprintf("> forward\n");
317 err = unmarshal_uint16(io, &msg->fwd.protocol);
318 if(err < 0) goto exit;
319 dprintf("> forward len\n");
320 err = unmarshal_uint16(io, &msg->fwd.len);
321 if(err < 0) goto exit;
322 dprintf("> forward bytes\n");
323 err = unmarshal_bytes(io, msg->fwd.data, msg->fwd.len);
324 break;
325 default:
326 wprintf("> Invalid id %d\n", msg->hdr.id);
327 err = -EINVAL;
328 break;
329 }
330 exit:
331 dprintf("< err=%d \n", err);
332 return err;
333 }
335 Vnetd _vnetd = {};
336 Vnetd *vnetd = &_vnetd;
338 /** Counter for timer alarms.
339 */
340 static unsigned timer_alarms = 0;
342 /** Set vnetd defaults.
343 *
344 * @param vnetd vnetd
345 */
346 void vnetd_set_defaults(Vnetd *vnetd){
347 *vnetd = (Vnetd){};
348 vnetd->port = htons(VNETD_PORT);
349 vnetd->peer_port = vnetd->port; //htons(VNETD_PEER_PORT);
350 vnetd->verbose = FALSE;
351 vnetd->peers = ONULL;
352 vnetd->mcast_addr.sin_addr.s_addr = VARP_MCAST_ADDR;
353 vnetd->mcast_addr.sin_port = vnetd->port;
354 }
356 uint32_t vnetd_mcast_addr(Vnetd *vnetd){
357 return vnetd->mcast_addr.sin_addr.s_addr;
358 }
360 uint16_t vnetd_mcast_port(Vnetd *vnetd){
361 return vnetd->mcast_addr.sin_port;
362 }
364 /** Add a connection to a peer.
365 *
366 * @param vnetd vnetd
367 * @param conn connection
368 */
369 void connections_add(Vnetd *vnetd, Conn *conn){
370 vnetd->connections = ConnList_add(conn, vnetd->connections);
371 }
373 /** Delete a connection to a peer.
374 *
375 * @param vnetd vnetd
376 * @param conn connection
377 */
378 void connections_del(Vnetd *vnetd, Conn *conn){
379 ConnList *prev, *curr, *next;
380 for(prev = NULL, curr = vnetd->connections; curr; prev = curr, curr = next){
381 next = curr->next;
382 if(curr->conn == conn){
383 if(prev){
384 prev->next = curr->next;
385 } else {
386 vnetd->connections = curr->next;
387 }
388 }
389 }
390 }
392 /** Close all connections to peers.
393 *
394 * @param vnetd vnetd
395 */
396 void connections_close_all(Vnetd *vnetd){
397 ConnList *l;
398 for(l = vnetd->connections; l; l = l->next){
399 Conn_close(l->conn);
400 }
401 vnetd->connections = NULL;
402 }
404 /** Add peer connections to a select set.
405 *
406 * @param vnetd vnetd
407 * @param set select set
408 */
409 void connections_select(Vnetd *vnetd, SelectSet *set){
410 ConnList *l;
411 for(l = vnetd->connections; l; l = l->next){
412 SelectSet_add_read(set, l->conn->sock);
413 }
414 }
416 /** Handle peer connections according to a select set.
417 *
418 * @param vnetd vnetd
419 * @param set indicates ready connections
420 */
421 void connections_handle(Vnetd *vnetd, SelectSet *set){
422 ConnList *prev, *curr, *next;
423 Conn *conn;
424 for(prev = NULL, curr = vnetd->connections; curr; prev = curr, curr = next){
425 next = curr->next;
426 conn = curr->conn;
427 if(FD_ISSET(conn->sock, &set->rd)){
428 int conn_err;
429 conn_err = Conn_handle(conn);
430 if(conn_err){
431 if(prev){
432 prev->next = curr->next;
433 } else {
434 vnetd->connections = curr->next;
435 }
436 }
437 }
438 }
439 }
441 /** Forward a message from a peer onto the local subnet.
442 *
443 * @param vnetd vnetd
444 * @param vmsg message
445 * @return 0 on success, error code otherwise
446 */
447 int vnetd_forward_local(Vnetd *vnetd, VnetMsg *vmsg){
448 int err = 0;
449 int sock = 0;
450 struct sockaddr_in addr_in;
451 struct sockaddr *addr = (struct sockaddr *)&addr_in;
452 socklen_t addr_n = sizeof(addr_in);
454 dprintf(">\n");
455 switch(vmsg->fwd.protocol){
456 case IPPROTO_ESP:
457 dprintf("> ESP\n");
458 sock = vnetd->esp_sock; break;
459 case IPPROTO_ETHERIP:
460 dprintf("> Etherip\n");
461 sock = vnetd->etherip_sock; break;
462 default:
463 err = -EINVAL;
464 goto exit;
465 }
466 addr_in.sin_family = AF_INET;
467 addr_in.sin_addr = vnetd->mcast_addr.sin_addr;
468 addr_in.sin_port = htons(vmsg->fwd.protocol);
469 dprintf("> send dst=%s protocol=%d len=%d\n",
470 inet_ntoa(addr_in.sin_addr), vmsg->fwd.protocol, vmsg->fwd.len);
471 err = sendto(sock, vmsg->fwd.data, vmsg->fwd.len, 0, addr, addr_n);
472 exit:
473 dprintf("< err=%d\n", err);
474 return err;
475 }
477 /** Forward a message to a peer.
478 *
479 * @param conn peer connection
480 * @param protocol message protocol
481 * @param data message data
482 * @param data_n message size
483 * @return 0 on success, error code otherwise
484 */
485 int vnetd_forward_peer(Conn *conn, int protocol, void *data, int data_n){
486 int err = 0;
487 IOStream _io, *io = &_io;
488 StringData sdata;
489 char buf[1600];
491 dprintf("> addr=%s protocol=%d n=%d\n",
492 inet_ntoa(conn->addr.sin_addr), protocol, data_n);
493 string_stream_init(io, &sdata, buf, sizeof(buf));
494 err = marshal_uint16(io, VNET_FWD_ID);
495 if(err < 0) goto exit;
496 err = marshal_uint16(io, 0);
497 if(err < 0) goto exit;
498 err = marshal_uint16(io, protocol);
499 if(err < 0) goto exit;
500 err = marshal_uint16(io, data_n);
501 if(err < 0) goto exit;
502 err = marshal_bytes(io, data, data_n);
503 if(err < 0) goto exit;
504 err = IOStream_write(conn->out, buf, IOStream_get_written(io));
505 IOStream_flush(conn->out);
506 exit:
507 if(err < 0) perror(__FUNCTION__);
508 dprintf("< err=%d\n", err);
509 return err;
510 }
512 /** Forward a message to all peers.
513 *
514 * @param vnetd vnetd
515 * @param protocol message protocol
516 * @param data message data
517 * @param data_n message size
518 * @return 0 on success, error code otherwise
519 */
520 int vnetd_forward_peers(Vnetd *vnetd, int protocol, void *data, int data_n){
521 int err = 0;
522 ConnList *curr, *next;
524 dprintf(">\n");
525 for(curr = vnetd->connections; curr; curr = next){
526 next = curr->next;
527 vnetd_forward_peer(curr->conn, protocol, data, data_n);
528 }
529 dprintf("< err=%d\n", err);
530 return err;
531 }
533 /** Handler for a peer connection.
534 * Reads a VnetMsg from the connection and handles it.
535 *
536 * @param conn peer connection
537 * @return 0 on success, error code otherwise
538 */
539 int conn_handle_fn(Conn *conn){
540 int err = 0;
541 VnetMsg *vmsg = ALLOCATE(VnetMsg);
542 IPMessage *msg = NULL;
544 dprintf("> addr=%s port=%u\n",
545 inet_ntoa(conn->addr.sin_addr),
546 ntohs(conn->addr.sin_port));
547 err = VnetMsg_unmarshal(conn->in, vmsg);
548 if(err < 0){
549 wprintf("> Unmarshal error %d\n", err);
550 goto exit;
551 }
552 switch(vmsg->hdr.id){
553 case VNET_VARP_ID:
554 dprintf("> Got varp message\n");
555 msg = ALLOCATE(IPMessage);
556 msg->conn = conn;
557 msg->saddr = conn->addr;
558 msg->data = vmsg;
559 err = vcache_handle_message(msg, 0);
560 err = 0;
561 break;
562 case VNET_FWD_ID:
563 dprintf("> Got forward message\n");
564 err = vnetd_forward_local(vnetd, vmsg);
565 err = 0;
566 break;
567 default:
568 wprintf("> Invalid id=%d\n", vmsg->hdr.id);
569 err = -EINVAL;
570 break;
571 }
572 exit:
573 dprintf("< err=%d\n", err);
574 return err;
575 }
577 /** Accept an incoming tcp connection from a peer vnetd.
578 *
579 * @param sock tcp socket
580 * @return 0 on success, error code otherwise
581 */
582 int vnetd_accept(Vnetd *vnetd, Conn *conn){
583 Conn *new_conn = NULL;
584 struct sockaddr_in peer_in;
585 struct sockaddr *peer = (struct sockaddr *)&peer_in;
586 socklen_t peer_n = sizeof(peer_in);
587 int peersock;
588 int err = 0;
590 //dprintf(">\n");
591 new_conn = Conn_new(conn_handle_fn, vnetd);
592 //dprintf("> accept...\n");
593 peersock = accept(conn->sock, peer, &peer_n);
594 //dprintf("> accept=%d\n", peersock);
595 if(peersock < 0){
596 perror("accept");
597 err = -errno;
598 goto exit;
599 }
600 iprintf("> Accepted connection from %s:%d\n",
601 inet_ntoa(peer_in.sin_addr), htons(peer_in.sin_port));
602 err = Conn_init(new_conn, peersock, SOCK_STREAM, peer_in);
603 if(err) goto exit;
604 connections_add(vnetd, new_conn);
605 exit:
606 if(err){
607 Conn_close(new_conn);
608 }
609 if(err < 0) wprintf("< err=%d\n", err);
610 return err;
611 }
613 /** Connect to a peer vnetd.
614 *
615 * @param vnetd vnetd
616 * @param addr address
617 * @param port port
618 * @return 0 on success, error code otherwise
619 */
620 int vnetd_connect(Vnetd *vnetd, struct in_addr addr, uint16_t port){
621 Conn *conn = NULL;
622 int err = 0;
624 //dprintf(">\n");
625 conn = Conn_new(conn_handle_fn, vnetd);
626 err = Conn_connect(conn, SOCK_STREAM, addr, port);
627 if(err) goto exit;
628 connections_add(vnetd, conn);
629 exit:
630 if(err){
631 Conn_close(conn);
632 }
633 //dprintf(" < err=%d\n", err);
634 return err;
635 }
637 /** Handle a message on the udp socket.
638 * Expecting to see VARP messages only.
639 *
640 * @param sock udp socket
641 * @return 0 on success, error code otherwise
642 */
643 int vnetd_handle_udp(Vnetd *vnetd, Conn *conn){
644 int err = 0, rcv = 0;
645 struct sockaddr_in self_in;
646 struct sockaddr_in peer_in;
647 struct sockaddr *peer = (struct sockaddr *)&peer_in;
648 socklen_t peer_n = sizeof(peer_in);
649 VnetMsg *vmsg = NULL;
650 void *data;
651 int data_n;
652 int flags = 0;
653 IPMessage *msg = NULL;
655 //dprintf(">\n");
656 self_in = vnetd->addr;
657 vmsg = ALLOCATE(VnetMsg);
658 data = &vmsg->varp.varph;
659 data_n = sizeof(VarpHdr);
660 rcv = recvfrom(conn->sock, data, data_n, flags, peer, &peer_n);
661 if(rcv < 0){
662 err = rcv;
663 goto exit;
664 }
665 dprintf("> Received %d bytes from %s:%d\n",
666 rcv, inet_ntoa(peer_in.sin_addr), htons(peer_in.sin_port));
667 if(rcv != data_n){
668 err = -EINVAL;
669 goto exit;
670 }
671 if(peer_in.sin_addr.s_addr == self_in.sin_addr.s_addr){
672 //dprintf("> Ignoring message from self.\n");
673 goto exit;
674 }
675 msg = ALLOCATE(IPMessage);
676 msg->conn = conn;
677 msg->saddr = peer_in;
678 msg->data = vmsg;
680 err = vcache_handle_message(msg, 1);
681 exit:
682 //dprintf("< err=%d\n", err);
683 return err;
684 }
686 /** Handle a message on a raw socket.
687 * Only deals with etherip and esp.
688 * Forwards messages to peers.
689 *
690 * @param vnetd vnetd
691 * @param sock socket
692 * @param protocol protocol
693 * @return 0 on success, error code otherwise
694 */
695 int vnetd_handle_protocol(Vnetd *vnetd, int sock, int protocol){
696 int err = 0, rcv = 0;
697 struct sockaddr_in self_in;
698 struct sockaddr_in peer_in;
699 struct sockaddr *peer = (struct sockaddr *)&peer_in;
700 socklen_t peer_n = sizeof(peer_in);
701 uint8_t buf[VNET_FWD_MAX];
702 int buf_n = sizeof(buf);
703 char *data, *end;
704 int flags = 0;
705 struct iphdr *iph = NULL;
707 //dprintf(">\n");
708 self_in = vnetd->addr;
709 rcv = recvfrom(sock, buf, buf_n, flags, peer, &peer_n);
710 if(rcv < 0){
711 err = rcv;
712 goto exit;
713 }
714 dprintf("> Received %d bytes from %s protocol=%d\n",
715 rcv, inet_ntoa(peer_in.sin_addr), protocol);
716 if(rcv < sizeof(struct iphdr)){
717 wprintf("> Message too short for IP header\n");
718 err = -EINVAL;
719 goto exit;
720 }
721 if(peer_in.sin_addr.s_addr == self_in.sin_addr.s_addr){
722 dprintf("> Ignoring message from self.\n");
723 goto exit;
724 }
725 data = buf;
726 end = buf + rcv;
727 iph = (void*)data;
728 data += (iph->ihl << 2);
729 vnetd_forward_peers(vnetd, protocol, data, end - data);
730 exit:
731 //dprintf("< err=%d\n", err);
732 return err;
733 }
735 /** Socket select loop.
736 * Accepts connections on the tcp socket and handles
737 * messages on the other sockets.
738 *
739 * @return 0 on success, error code otherwise
740 */
741 int vnetd_select(Vnetd *vnetd){
742 int err = 0;
743 SelectSet set = {};
744 while(1){
745 SelectSet_zero(&set);
746 SelectSet_add_read(&set, vnetd->udp_conn->sock);
747 SelectSet_add_read(&set, vnetd->bcast_conn->sock);
748 SelectSet_add_read(&set, vnetd->etherip_sock);
749 SelectSet_add_read(&set, vnetd->esp_sock);
750 SelectSet_add_read(&set, vnetd->listen_conn->sock);
751 connections_select(vnetd, &set);
752 err = SelectSet_select(&set, NULL);
753 if(err == 0) continue;
754 if(err < 0){
755 if(errno == EINTR){
756 if(timer_alarms){
757 timer_alarms = 0;
758 process_timers();
759 }
760 continue;
761 }
762 perror("select");
763 goto exit;
764 }
765 if(FD_ISSET(vnetd->udp_conn->sock, &set.rd)){
766 vnetd_handle_udp(vnetd, vnetd->udp_conn);
767 }
768 if(FD_ISSET(vnetd->bcast_conn->sock, &set.rd)){
769 vnetd_handle_udp(vnetd, vnetd->bcast_conn);
770 }
771 if(FD_ISSET(vnetd->etherip_sock, &set.rd)){
772 vnetd_handle_protocol(vnetd, vnetd->etherip_sock, IPPROTO_ETHERIP);
773 }
774 if(FD_ISSET(vnetd->esp_sock, &set.rd)){
775 vnetd_handle_protocol(vnetd, vnetd->esp_sock, IPPROTO_ESP);
776 }
777 connections_handle(vnetd, &set);
778 if(FD_ISSET(vnetd->listen_conn->sock, &set.rd)){
779 vnetd_accept(vnetd, vnetd->listen_conn);
780 }
781 }
782 exit:
783 return err;
784 }
786 /** Set socket option to reuse address.
787 */
788 int setsock_reuse(int sock, int reuse){
789 int err = 0;
790 err = setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof(reuse));
791 if(err < 0){
792 err = -errno;
793 perror("setsockopt SO_REUSEADDR");
794 }
795 return err;
796 }
798 /** Set socket broadcast option.
799 */
800 int setsock_broadcast(int sock, int bcast){
801 int err = 0;
802 err = setsockopt(sock, SOL_SOCKET, SO_BROADCAST, &bcast, sizeof(bcast));
803 if(err < 0){
804 err = -errno;
805 perror("setsockopt SO_BROADCAST");
806 }
807 return err;
808 }
810 /** Join a socket to a multicast group.
811 */
812 int setsock_multicast(int sock, uint32_t saddr){
813 int err = 0;
814 struct ip_mreqn mreq = {};
815 int mloop = 0;
816 // See 'man 7 ip' for these options.
817 mreq.imr_multiaddr.s_addr = saddr; // IP multicast address.
818 mreq.imr_address = vnetd->addr.sin_addr; // Interface IP address.
819 mreq.imr_ifindex = 0; // Interface index (0 means any).
820 err = setsockopt(sock, SOL_IP, IP_MULTICAST_LOOP, &mloop, sizeof(mloop));
821 if(err < 0){
822 err = -errno;
823 perror("setsockopt IP_MULTICAST_LOOP");
824 goto exit;
825 }
826 err = setsockopt(sock, SOL_IP, IP_ADD_MEMBERSHIP, &mreq, sizeof(mreq));
827 if(err < 0){
828 err = -errno;
829 perror("setsockopt IP_ADD_MEMBERSHIP");
830 goto exit;
831 }
832 exit:
833 return err;
834 }
836 /** Set a socket's multicast ttl (default is 1).
837 */
838 int setsock_multicast_ttl(int sock, uint8_t ttl){
839 int err = 0;
840 err = setsockopt(sock, SOL_IP, IP_MULTICAST_TTL, &ttl, sizeof(ttl));
841 if(err < 0){
842 err = -errno;
843 perror("setsockopt IP_MULTICAST_TTL");
844 }
845 return err;
846 }
849 char * socket_flags(int flags){
850 static char s[6];
851 int i = 0;
852 s[i++] = (flags & VSOCK_CONNECT ? 'c' : '-');
853 s[i++] = (flags & VSOCK_BIND ? 'b' : '-');
854 s[i++] = (flags & VSOCK_REUSE ? 'r' : '-');
855 s[i++] = (flags & VSOCK_BROADCAST ? 'B' : '-');
856 s[i++] = (flags & VSOCK_MULTICAST ? 'M' : '-');
857 s[i++] = '\0';
858 return s;
859 }
861 /** Create a socket.
862 * The flags can include VSOCK_REUSE, VSOCK_BROADCAST, VSOCK_CONNECT.
863 *
864 * @param socktype socket type
865 * @param saddr address
866 * @param port port
867 * @param flags flags
868 * @param val return value for the socket connection
869 * @return 0 on success, error code otherwise
870 */
871 int create_socket(int socktype, uint32_t saddr, uint32_t port, int flags, Conn **val){
872 int err = 0;
873 int sock = 0;
874 struct sockaddr_in addr_in;
875 struct sockaddr *addr = (struct sockaddr *)&addr_in;
876 socklen_t addr_n = sizeof(addr_in);
877 Conn *conn = NULL;
878 int reuse, bcast;
880 //dprintf(">\n");
881 reuse = (flags & VSOCK_REUSE);
882 bcast = (flags & VSOCK_BROADCAST);
883 addr_in.sin_family = AF_INET;
884 addr_in.sin_addr.s_addr = saddr;
885 addr_in.sin_port = port;
886 dprintf("> flags=%s addr=%s port=%d\n", socket_flags(flags),
887 inet_ntoa(addr_in.sin_addr), ntohs(addr_in.sin_port));
889 sock = socket(AF_INET, socktype, 0);
890 if(sock < 0){
891 err = -errno;
892 goto exit;
893 }
894 if(reuse){
895 err = setsock_reuse(sock, reuse);
896 if(err < 0) goto exit;
897 }
898 if(bcast){
899 err = setsock_broadcast(sock, bcast);
900 if(err < 0) goto exit;
901 }
902 if(flags & VSOCK_MULTICAST){
903 err = setsock_multicast(sock, saddr);
904 if(err < 0) goto exit;
905 }
906 if(flags & VSOCK_CONNECT){
907 err = connect(sock, addr, addr_n);
908 if(err < 0){
909 err = -errno;
910 perror("connect");
911 goto exit;
912 }
913 }
914 if(flags & VSOCK_BIND){
915 err = bind(sock, addr, addr_n);
916 if(err < 0){
917 err = -errno;
918 perror("bind");
919 goto exit;
920 }
921 }
922 conn = Conn_new(NULL, NULL);
923 Conn_init(conn, sock, socktype, addr_in);
924 {
925 struct sockaddr_in self = {};
926 socklen_t self_n;
927 getsockname(conn->sock, (struct sockaddr *)&self, &self_n);
928 dprintf("> sockname sock=%d addr=%s port=%d\n",
929 conn->sock, inet_ntoa(self.sin_addr), ntohs(self.sin_port));
930 }
931 exit:
932 *val = (err ? NULL : conn);
933 //dprintf("< err=%d\n", err);
934 return err;
935 }
937 /** Create the tcp listen socket.
938 *
939 * @param vnetd program arguments
940 * @param val return value for the socket
941 * @return 0 on success, error code otherwise
942 */
943 int vnetd_listen_conn(Vnetd *vnetd, Conn **val){
944 int err = 0;
945 int flags = VSOCK_BIND | VSOCK_REUSE;
946 //dprintf(">\n");
947 err = create_socket(SOCK_STREAM, INADDR_ANY, vnetd->peer_port, flags, val);
948 if(err) goto exit;
949 err = listen((*val)->sock, 5);
950 if(err < 0){
951 err = -errno;
952 perror("listen");
953 goto exit;
954 }
955 exit:
956 if(err && *val){
957 Conn_close(*val);
958 *val = NULL;
959 }
960 //dprintf("< err=%d\n", err);
961 return err;
962 }
964 /** Create the udp socket.
965 *
966 * @param vnetd program arguments
967 * @param val return value for the socket
968 * @return 0 on success, error code otherwise
969 */
970 int vnetd_udp_conn(Vnetd *vnetd, Conn **val){
971 int err = 0;
972 uint32_t addr = INADDR_ANY;
973 uint16_t port = vnetd->port;
974 int flags = (VSOCK_BIND | VSOCK_REUSE);
975 err = create_socket(SOCK_DGRAM, addr, port, flags, val);
976 return err;
977 }
979 /** Create the broadcast socket.
980 *
981 * @param vnetd program arguments
982 * @param val return value for the socket
983 * @return 0 on success, error code otherwise
984 */
985 int vnetd_broadcast_conn(Vnetd *vnetd, Conn **val){
986 int err = 0;
987 uint32_t addr = vnetd_mcast_addr(vnetd);
988 uint16_t port = vnetd_mcast_port(vnetd);
989 int flags = VSOCK_REUSE;
990 int multicast = IN_MULTICAST(ntohl(addr));
992 flags |= VSOCK_MULTICAST;
993 flags |= VSOCK_BROADCAST;
995 err = create_socket(SOCK_DGRAM, addr, port, flags, val);
996 if(err < 0) goto exit;
997 if(multicast){
998 err = setsock_multicast_ttl((*val)->sock, 1);
999 if(err < 0) goto exit;
1001 if(0){
1002 struct sockaddr * addr = (struct sockaddr *)&vnetd->addr;
1003 socklen_t addr_n = sizeof(vnetd->addr);
1004 dprintf("> sock=%d bind addr=%s:%d\n",
1005 (*val)->sock, inet_ntoa(vnetd->addr.sin_addr), ntohs(vnetd->addr.sin_port));
1006 err = bind((*val)->sock, addr, addr_n);
1007 if(err < 0){
1008 err = -errno;
1009 perror("bind");
1010 goto exit;
1013 if(0){
1014 struct sockaddr_in self = {};
1015 socklen_t self_n;
1016 getsockname((*val)->sock, (struct sockaddr *)&self, &self_n);
1017 dprintf("> sockname sock=%d addr=%s port=%d\n",
1018 (*val)->sock, inet_ntoa(self.sin_addr), ntohs(self.sin_port));
1020 exit:
1021 return err;
1024 /** Type for signal handling functions. */
1025 typedef void SignalAction(int code, siginfo_t *info, void *data);
1027 /** Handle SIGCHLD by getting child exit status.
1028 * This prevents child processes being defunct.
1030 * @param code signal code
1031 * @param info signal info
1032 * @param data
1033 */
1034 static void sigaction_SIGCHLD(int code, siginfo_t *info, void *data){
1035 int status;
1036 pid_t pid;
1037 pid = wait(&status);
1038 dprintf("> child pid=%d status=%d\n", pid, status);
1041 /** Handle SIGPIPE.
1043 * @param code signal code
1044 * @param info signal info
1045 * @param data
1046 */
1047 static void sigaction_SIGPIPE(int code, siginfo_t *info, void *data){
1048 dprintf("> SIGPIPE\n");
1051 /** Handle SIGALRM.
1053 * @param code signal code
1054 * @param info signal info
1055 * @param data
1056 */
1057 static void sigaction_SIGALRM(int code, siginfo_t *info, void *data){
1058 //dprintf("> SIGALRM\n");
1059 timer_alarms++;
1062 /** Install a handler for a signal.
1064 * @param signum signal
1065 * @param action handler
1066 * @return 0 on success, error code otherwise
1067 */
1068 static int catch_signal(int signum, SignalAction *action){
1069 int err = 0;
1070 struct sigaction sig = {};
1071 sig.sa_sigaction = action;
1072 sig.sa_flags = SA_SIGINFO;
1073 err = sigaction(signum, &sig, NULL);
1074 if(err){
1075 perror("sigaction");
1077 return err;
1080 /** Create a raw socket.
1082 * @param protocol protocol
1083 * @param flags flags
1084 * @param sock return value for the socket
1085 */
1086 int vnetd_raw_socket(int protocol, int flags, uint32_t mcaddr, int *sock){
1087 int err;
1088 int bcast = (flags & VSOCK_BROADCAST);
1089 //dprintf("> protocol=%d\n", protocol);
1090 err = *sock = socket(AF_INET, SOCK_RAW, protocol);
1091 if(err < 0){
1092 err = -errno;
1093 perror("socket");
1094 goto exit;
1096 if(bcast){
1097 err = setsock_broadcast(*sock, bcast);
1098 if(err < 0) goto exit;
1100 if(flags & VSOCK_MULTICAST){
1101 err = setsock_multicast(*sock, mcaddr);
1102 if(err < 0) goto exit;
1104 exit:
1105 //dprintf("< err=%d\n", err);
1106 return err;
1109 /** Connect to peer vnetds.
1111 * @param vnetd vnetd
1112 * @return 0 on success, error code otherwise
1113 */
1114 int vnetd_peers(Vnetd *vnetd){
1115 int err =0;
1116 Sxpr x, l;
1117 struct in_addr addr = {};
1118 for(l = vnetd->peers; CONSP(l); l = CDR(l)){
1119 x = CAR(l);
1120 addr.s_addr = OBJ_INT(x);
1121 vnetd_connect(vnetd, addr, vnetd->peer_port);
1123 return err;
1126 /** Vnet daemon main program.
1128 * @param vnetd program arguments
1129 * @return 0 on success, error code otherwise
1130 */
1131 int vnetd_main(Vnetd *vnetd){
1132 int err = 0;
1134 //dprintf(">\n");
1135 err = get_self_addr(&vnetd->addr);
1136 vnetd->addr.sin_port = vnetd->port;
1137 iprintf("> VNETD\n");
1138 iprintf("> addr=%s port=%u\n",
1139 inet_ntoa(vnetd->addr.sin_addr), htons(vnetd->port));
1140 iprintf("> mcaddr=%s port=%u\n",
1141 inet_ntoa(vnetd->mcast_addr.sin_addr), htons(vnetd->port));
1142 iprintf("> peers port=%u ", htons(vnetd->peer_port));
1143 objprint(iostdout, vnetd->peers, 0); printf("\n");
1145 err = vcache_init();
1146 err = vnetd_peers(vnetd);
1148 catch_signal(SIGCHLD,sigaction_SIGCHLD);
1149 catch_signal(SIGPIPE,sigaction_SIGPIPE);
1150 catch_signal(SIGALRM,sigaction_SIGALRM);
1151 err = vnetd_listen_conn(vnetd, &vnetd->listen_conn);
1152 if(err < 0) goto exit;
1153 err = vnetd_udp_conn(vnetd, &vnetd->udp_conn);
1154 if(err < 0) goto exit;
1155 err = vnetd_broadcast_conn(vnetd, &vnetd->bcast_conn);
1156 if(err < 0) goto exit;
1158 int flags = (VSOCK_BROADCAST | VSOCK_MULTICAST);
1159 uint32_t mcaddr = vnetd->mcast_addr.sin_addr.s_addr;
1161 err = vnetd_raw_socket(IPPROTO_ETHERIP, flags, mcaddr, &vnetd->etherip_sock);
1162 if(err < 0) goto exit;
1163 err = vnetd_raw_socket(IPPROTO_ESP, flags, mcaddr, &vnetd->esp_sock);
1164 if(err < 0) goto exit;
1166 err = vnetd_select(vnetd);
1167 exit:
1168 Conn_close(vnetd->listen_conn);
1169 Conn_close(vnetd->udp_conn);
1170 Conn_close(vnetd->bcast_conn);
1171 connections_close_all(vnetd);
1172 close(vnetd->etherip_sock);
1173 close(vnetd->esp_sock);
1174 //dprintf("< err=%d\n", err);
1175 return err;
1178 /** Parse command-line arguments and call the vnetd main program.
1180 * @param arg argument count
1181 * @param argv arguments
1182 * @return 0 on success, 1 otherwise
1183 */
1184 extern int main(int argc, char *argv[]){
1185 int err = 0;
1186 int key = 0;
1187 int long_index = 0;
1189 vnetd_set_defaults(vnetd);
1190 while(1){
1191 key = getopt_long(argc, argv, short_opts, long_opts, &long_index);
1192 if(key == -1) break;
1193 switch(key){
1194 case OPT_ADDR:{
1195 unsigned long addr;
1196 err = get_host_address(optarg, &addr);
1197 if(err) goto exit;
1198 vnetd->mcast_addr.sin_addr.s_addr = addr;
1199 break; }
1200 case OPT_PORT:
1201 err = convert_service_to_port(optarg, &vnetd->port);
1202 if(err) goto exit;
1203 break;
1204 case OPT_PEER:{
1205 unsigned long addr;
1206 err = get_host_address(optarg, &addr);
1207 if(err) goto exit;
1208 //cons_push(&vnetd->peers, mkaddress(addr));
1209 cons_push(&vnetd->peers, mkint(addr));
1210 break; }
1211 case OPT_HELP:
1212 usage(0);
1213 break;
1214 case OPT_VERBOSE:
1215 vnetd->verbose = TRUE;
1216 break;
1217 case OPT_VERSION:
1218 iprintf("> %s %s\n", PROGRAM, VERSION);
1219 exit(0);
1220 break;
1221 default:
1222 usage(EINVAL);
1223 break;
1226 err = vnetd_main(vnetd);
1227 exit:
1228 if(err && key > 0){
1229 eprintf("> Error in arg %c\n", key);
1231 return (err ? 1 : 0);