ia64/xen-unstable

view tools/vnet/vnetd/vnetd.c @ 8740:3d7ea7972b39

Update patches for linux 2.6.15.

Signed-off-by: Christian Limpach <Christian.Limpach@cl.cam.ac.uk>
author cl349@firebug.cl.cam.ac.uk
date Thu Feb 02 17:16:00 2006 +0000 (2006-02-02)
parents 06d84bf87159
children 71b0f00f6344
line source
1 /*
2 * Copyright (C) 2004 Mike Wray <mike.wray@hp.com>.
3 *
4 * This library is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU Lesser General Public License as
6 * published by the Free Software Foundation; either version 2.1 of the
7 * License, or (at your option) any later version. This library is
8 * distributed in the hope that it will be useful, but WITHOUT ANY
9 * WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE.
11 * See the GNU Lesser General Public License for more details.
12 *
13 * You should have received a copy of the GNU Lesser General Public License
14 * along with this library; if not, write to the Free Software Foundation,
15 * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16 */
17 /** @file
18 *
19 * Vnetd tcp messages:
20 *
21 * - varp request: request care-of-addr for a vif.
22 * If know answer, reply. If not broadcast locally.
23 *
24 * - varp announce: reply to a varp request.
25 * If a (local) request is pending, remember and broadcast locally.
26 *
27 * - vnet subscribe: indicate there are local vifs in a vnet (use varp announce?).
28 *
29 * - vnet forward: tunneled broadcast packet to rebroadcast.
30 * Broadcast locally (if there are vifs in the vnet).
31 *
32 *
33 * Vnetd udp messages (varp):
34 *
35 * - local varp request:
36 * If know and vif is non-local, reply.
37 * If know and vif is local, do nothing (but announce will reset).
38 * If have entry saying is local and no-one answers - remove (? or rely on entry timeout).
39 * If don't know and there is no (quick) local reply, forward to peers.
40 *
41 * - remote varp request:
42 * If know, reply.
43 * If don't know, query locally (and queue request).
44 *
45 * - varp announce: remember and adjust vnet subscriptions.
46 * Forward to peers if a request is pending.
47 *
48 * Vnetd broadcast messages (tunneling):
49 *
50 * - etherip: forward to peers (on the right vnets)
51 *
52 * - esp: forward to peers (on the right vnets)
53 *
54 *
55 * For etherip can tell the vnet from the header (in clear).
56 * But for esp can't. So should use mcast to define? Or always some clear header?
57 *
58 * Make ssl on tcp connections optional.
59 *
60 * So far have been assuming esp for security.
61 * But could use vnetd to forward and use ssl on the connection.
62 * But has usual probs with efficiency.
63 * However, should 'just work' if the coa for the vif has been set
64 * to the vnetd. How? Vnetd configured to act as gateway for
65 * some peers? Then would rewrite varp announce to itself and forward
66 * traffic to peer.
67 *
68 * Simplify - make each vnetd have one peer?
69 * If need to link more subnets, add vnetds?
70 *
71 * Need requests table for each tcp conn (incoming).
72 * - entries we want to resolve (and fwd the answer).
73 *
74 * Need requests table for the udp socket.
75 * - entries we want to resolve (and return the answer).
76 *
77 * Need table of entries we know.
78 * - from caching local announce
79 * - from caching announce reply to forwarded request
80 *
81 * Problem with replying to requests from the cache - if the cache
82 * is out of date we reply with incorrect data. So if a VM migrates
83 * we will advertise the old location until it times out.
84 *
85 * So should probably not reply out of the cache at all - but always
86 * query for the answer. Could query direct to old location if
87 * entry is valid the first time, and broadcast if no reply in timeout.
88 * Causes delay if migrated - may as well broadcast.
89 *
90 * Need to watch out for query loops. If have 3 vnetds A,B,C and
91 * A gets a query, forwards to B and C. B forwards to C, which
92 * forwards to A, and on forever. So if have an entry that has been
93 * probed, do not forward it when get another query for it.
94 *
95 * @author Mike Wray <mike.wray@hpl.hp.com>
96 */
99 #include <stdlib.h>
100 #include <unistd.h>
101 #include <stdio.h>
102 #include <getopt.h>
103 #include <errno.h>
104 #include <sys/types.h>
105 #include <time.h>
106 #include <sys/socket.h>
107 #include <netinet/in.h>
108 #include <arpa/inet.h>
109 #include <string.h>
111 #include <signal.h>
112 #include <sys/wait.h>
113 #include <sys/select.h>
115 #include <linux/ip.h> // For struct iphdr;
117 #include <linux/if_ether.h>
118 #include "if_etherip.h"
119 #include "if_varp.h"
121 #include "allocate.h"
123 #include "vnetd.h"
124 #include "file_stream.h"
125 #include "string_stream.h"
126 #include "socket_stream.h"
127 #include "sys_net.h"
129 #include "enum.h"
130 #include "sxpr.h"
132 #include "marshal.h"
133 #include "connection.h"
134 #include "select.h"
135 #include "timer.h"
136 #include "vcache.h"
138 int create_socket(int socktype, uint32_t saddr, uint32_t port, int flags, Conn **val);
140 #ifndef TRUE
141 #define TRUE 1
142 #endif
144 #ifndef FALSE
145 #define FALSE 0
146 #endif
148 /** Socket flags. */
149 enum {
150 VSOCK_REUSE=1,
151 VSOCK_BIND=2,
152 VSOCK_CONNECT=4,
153 VSOCK_BROADCAST=8,
154 VSOCK_MULTICAST=16,
155 };
157 #define PROGRAM "vnetd"
158 #define VERSION "0.1"
160 #define MODULE_NAME PROGRAM
161 #define DEBUG
162 #undef DEBUG
163 #include "debug.h"
165 #define OPT_PORT 'p'
166 #define KEY_PORT "port"
167 #define DOC_PORT "<port>\n\t" PROGRAM " UDP port (as a number or service name)"
169 #define OPT_ADDR 'm'
170 #define KEY_ADDR "mcaddr"
171 #define DOC_ADDR "<address>\n\t" PROGRAM " multicast address"
173 #define OPT_PEER 'r'
174 #define KEY_PEER "peer"
175 #define DOC_PEER "<peer>\n\t Peer " PROGRAM " to connect to (IP address or hostname)"
177 #define OPT_FILE 'f'
178 #define KEY_FILE "file"
179 #define DOC_FILE "<file>\n\t Configuration file to load"
181 #define OPT_CTRL 'c'
182 #define KEY_CTRL "control"
183 #define DOC_CTRL "<port>\n\t " PROGRAM " control port (as a number or service name)"
185 #define OPT_HELP 'h'
186 #define KEY_HELP "help"
187 #define DOC_HELP "\n\tprint help"
189 #define OPT_VERSION 'v'
190 #define KEY_VERSION "version"
191 #define DOC_VERSION "\n\tprint version"
193 #define OPT_VERBOSE 'V'
194 #define KEY_VERBOSE "verbose"
195 #define DOC_VERBOSE "\n\tverbose flag"
197 /** Print a usage message.
198 * Prints to stdout if err is zero, and exits with 0.
199 * Prints to stderr if err is non-zero, and exits with 1.
200 *
201 * @param err error code
202 */
203 static void usage(int err){
204 FILE *out = (err ? stderr : stdout);
206 fprintf(out, "Usage: %s [options]\n", PROGRAM);
207 fprintf(out, "-%c, --%s %s\n", OPT_ADDR, KEY_ADDR, DOC_ADDR);
208 fprintf(out, "-%c, --%s %s\n", OPT_PORT, KEY_PORT, DOC_PORT);
209 fprintf(out, "-%c, --%s %s\n", OPT_PEER, KEY_PEER, DOC_PEER);
210 fprintf(out, "-%c, --%s %s\n", OPT_VERBOSE, KEY_VERBOSE, DOC_VERBOSE);
211 fprintf(out, "-%c, --%s %s\n", OPT_VERSION, KEY_VERSION, DOC_VERSION);
212 fprintf(out, "-%c, --%s %s\n", OPT_HELP, KEY_HELP, DOC_HELP);
213 exit(err ? 1 : 0);
214 }
216 /** Short options. Options followed by ':' take an argument. */
217 static char *short_opts = (char[]){
218 OPT_ADDR, ':',
219 OPT_PORT, ':',
220 OPT_PEER, ':',
221 OPT_HELP,
222 OPT_VERSION,
223 OPT_VERBOSE,
224 0 };
226 /** Long options. */
227 static struct option const long_opts[] = {
228 { KEY_ADDR, required_argument, NULL, OPT_ADDR },
229 { KEY_PORT, required_argument, NULL, OPT_PORT },
230 { KEY_PEER, required_argument, NULL, OPT_PEER },
231 { KEY_HELP, no_argument, NULL, OPT_HELP },
232 { KEY_VERSION, no_argument, NULL, OPT_VERSION },
233 { KEY_VERBOSE, no_argument, NULL, OPT_VERBOSE },
234 { NULL, 0, NULL, 0 }
235 };
237 /** Get address of vnetd. So we can ignore broadcast traffic
238 * we sent ourselves.
239 *
240 * @param addr
241 * @return 0 on success, error code otherwise
242 */
243 int get_self_addr(struct sockaddr_in *addr){
244 int err = 0;
245 char hostname[1024] = {};
246 unsigned long saddr;
248 //dprintf(">\n");
249 err = gethostname(hostname, sizeof(hostname) -1);
250 if(err) goto exit;
251 err = get_host_address(hostname, &saddr);
252 if(err == 0){ err = -ENOENT; goto exit; }
253 err = 0;
254 addr->sin_addr.s_addr = saddr;
255 exit:
256 //dprintf("< err=%d\n", err);
257 return err;
258 }
260 /** Marshal a message.
261 *
262 * @param io destination
263 * @param msg message
264 * @return number of bytes written, or negative error code
265 */
266 int VnetMsg_marshal(IOStream *io, VnetMsg *msg){
267 int err = 0;
268 int hdr_n = sizeof(VnetMsgHdr);
270 err = marshal_uint16(io, msg->hdr.id);
271 if(err < 0) goto exit;
272 err = marshal_uint16(io, msg->hdr.opcode);
273 if(err < 0) goto exit;
274 switch(msg->hdr.id){
275 case VNET_VARP_ID:
276 err = marshal_bytes(io, ((char*)msg) + hdr_n, sizeof(VarpHdr) - hdr_n);
277 break;
278 case VNET_FWD_ID:
279 err = marshal_uint16(io, msg->fwd.protocol);
280 if(err < 0) goto exit;
281 err = marshal_uint16(io, msg->fwd.len);
282 if(err < 0) goto exit;
283 err = marshal_bytes(io, msg->fwd.data, msg->fwd.len);
284 break;
285 default:
286 err = -EINVAL;
287 break;
288 }
289 exit:
290 return err;
291 }
293 /** Unmarshal a message.
294 *
295 * @param io source
296 * @param msg message to unmarshal into
297 * @return number of bytes read, or negative error code
298 */
299 int VnetMsg_unmarshal(IOStream *io, VnetMsg *msg){
300 int err = 0;
301 int hdr_n = sizeof(VnetMsgHdr);
303 dprintf("> id\n");
304 err = unmarshal_uint16(io, &msg->hdr.id);
305 if(err < 0) goto exit;
306 dprintf("> opcode\n");
307 err = unmarshal_uint16(io, &msg->hdr.opcode);
308 if(err < 0) goto exit;
309 switch(msg->hdr.id){
310 case VNET_VARP_ID:
311 msg->hdr.opcode = htons(msg->hdr.opcode);
312 dprintf("> varp hdr_n=%d varphdr=%d\n", hdr_n, sizeof(VarpHdr));
313 err = unmarshal_bytes(io, ((char*)msg) + hdr_n, sizeof(VarpHdr) - hdr_n);
314 break;
315 case VNET_FWD_ID:
316 dprintf("> forward\n");
317 err = unmarshal_uint16(io, &msg->fwd.protocol);
318 if(err < 0) goto exit;
319 dprintf("> forward len\n");
320 err = unmarshal_uint16(io, &msg->fwd.len);
321 if(err < 0) goto exit;
322 dprintf("> forward bytes\n");
323 err = unmarshal_bytes(io, msg->fwd.data, msg->fwd.len);
324 break;
325 default:
326 wprintf("> Invalid id %d\n", msg->hdr.id);
327 err = -EINVAL;
328 break;
329 }
330 exit:
331 dprintf("< err=%d \n", err);
332 return err;
333 }
335 Vnetd _vnetd = {};
336 Vnetd *vnetd = &_vnetd;
338 /** Counter for timer alarms.
339 */
340 static unsigned timer_alarms = 0;
342 /** Set vnetd defaults.
343 *
344 * @param vnetd vnetd
345 */
346 void vnetd_set_defaults(Vnetd *vnetd){
347 *vnetd = (Vnetd){};
348 vnetd->port = htons(VNETD_PORT);
349 vnetd->peer_port = vnetd->port; //htons(VNETD_PEER_PORT);
350 vnetd->verbose = FALSE;
351 vnetd->peers = ONULL;
352 vnetd->mcast_addr.sin_addr.s_addr = VARP_MCAST_ADDR;
353 vnetd->mcast_addr.sin_port = vnetd->port;
354 }
356 uint32_t vnetd_mcast_addr(Vnetd *vnetd){
357 return vnetd->mcast_addr.sin_addr.s_addr;
358 }
360 uint16_t vnetd_mcast_port(Vnetd *vnetd){
361 return vnetd->mcast_addr.sin_port;
362 }
364 /** Add a connection to a peer.
365 *
366 * @param vnetd vnetd
367 * @param conn connection
368 */
369 void connections_add(Vnetd *vnetd, Conn *conn){
370 vnetd->connections = ConnList_add(conn, vnetd->connections);
371 }
373 /** Delete a connection to a peer.
374 *
375 * @param vnetd vnetd
376 * @param conn connection
377 */
378 void connections_del(Vnetd *vnetd, Conn *conn){
379 ConnList *prev, *curr, *next;
380 for(prev = NULL, curr = vnetd->connections; curr; prev = curr, curr = next){
381 next = curr->next;
382 if(curr->conn == conn){
383 if(prev){
384 prev->next = curr->next;
385 } else {
386 vnetd->connections = curr->next;
387 }
388 }
389 }
390 }
392 /** Close all connections to peers.
393 *
394 * @param vnetd vnetd
395 */
396 void connections_close_all(Vnetd *vnetd){
397 ConnList *l;
398 for(l = vnetd->connections; l; l = l->next){
399 Conn_close(l->conn);
400 }
401 vnetd->connections = NULL;
402 }
404 /** Add peer connections to a select set.
405 *
406 * @param vnetd vnetd
407 * @param set select set
408 */
409 void connections_select(Vnetd *vnetd, SelectSet *set){
410 ConnList *l;
411 for(l = vnetd->connections; l; l = l->next){
412 SelectSet_add_read(set, l->conn->sock);
413 }
414 }
416 /** Handle peer connections according to a select set.
417 *
418 * @param vnetd vnetd
419 * @param set indicates ready connections
420 */
421 void connections_handle(Vnetd *vnetd, SelectSet *set){
422 ConnList *prev, *curr, *next;
423 Conn *conn;
424 for(prev = NULL, curr = vnetd->connections; curr; prev = curr, curr = next){
425 next = curr->next;
426 conn = curr->conn;
427 if(FD_ISSET(conn->sock, &set->rd)){
428 int conn_err;
429 conn_err = Conn_handle(conn);
430 if(conn_err){
431 if(prev){
432 prev->next = curr->next;
433 } else {
434 vnetd->connections = curr->next;
435 }
436 }
437 }
438 }
439 }
441 /** Forward a message from a peer onto the local subnet.
442 *
443 * @param vnetd vnetd
444 * @param vmsg message
445 * @return 0 on success, error code otherwise
446 */
447 int vnetd_forward_local(Vnetd *vnetd, VnetMsg *vmsg){
448 int err = 0;
449 int sock = 0;
450 struct sockaddr_in addr_in;
451 struct sockaddr *addr = (struct sockaddr *)&addr_in;
452 socklen_t addr_n = sizeof(addr_in);
454 dprintf(">\n");
455 switch(vmsg->fwd.protocol){
456 case IPPROTO_ESP:
457 dprintf("> ESP\n");
458 sock = vnetd->esp_sock; break;
459 case IPPROTO_ETHERIP:
460 dprintf("> Etherip\n");
461 sock = vnetd->etherip_sock; break;
462 default:
463 err = -EINVAL;
464 goto exit;
465 }
466 addr_in.sin_family = AF_INET;
467 addr_in.sin_addr = vnetd->mcast_addr.sin_addr;
468 addr_in.sin_port = htons(vmsg->fwd.protocol);
469 dprintf("> send dst=%s protocol=%d len=%d\n",
470 inet_ntoa(addr_in.sin_addr), vmsg->fwd.protocol, vmsg->fwd.len);
471 err = sendto(sock, vmsg->fwd.data, vmsg->fwd.len, 0, addr, addr_n);
472 exit:
473 dprintf("< err=%d\n", err);
474 return err;
475 }
477 /** Forward a message to a peer.
478 *
479 * @param conn peer connection
480 * @param protocol message protocol
481 * @param data message data
482 * @param data_n message size
483 * @return 0 on success, error code otherwise
484 */
485 int vnetd_forward_peer(Conn *conn, int protocol, void *data, int data_n){
486 int err = 0;
487 IOStream _io, *io = &_io;
488 StringData sdata;
489 char buf[1600];
491 dprintf("> addr=%s protocol=%d n=%d\n",
492 inet_ntoa(conn->addr.sin_addr), protocol, data_n);
493 string_stream_init(io, &sdata, buf, sizeof(buf));
494 err = marshal_uint16(io, VNET_FWD_ID);
495 if(err < 0) goto exit;
496 err = marshal_uint16(io, 0);
497 if(err < 0) goto exit;
498 err = marshal_uint16(io, protocol);
499 if(err < 0) goto exit;
500 err = marshal_uint16(io, data_n);
501 if(err < 0) goto exit;
502 err = marshal_bytes(io, data, data_n);
503 if(err < 0) goto exit;
504 err = IOStream_write(conn->out, buf, IOStream_get_written(io));
505 IOStream_flush(conn->out);
506 exit:
507 if(err < 0) perror(__FUNCTION__);
508 dprintf("< err=%d\n", err);
509 return err;
510 }
512 /** Forward a message to all peers.
513 *
514 * @param vnetd vnetd
515 * @param protocol message protocol
516 * @param data message data
517 * @param data_n message size
518 * @return 0 on success, error code otherwise
519 */
520 int vnetd_forward_peers(Vnetd *vnetd, int protocol, void *data, int data_n){
521 int err = 0;
522 ConnList *curr, *next;
524 dprintf(">\n");
525 for(curr = vnetd->connections; curr; curr = next){
526 next = curr->next;
527 vnetd_forward_peer(curr->conn, protocol, data, data_n);
528 }
529 dprintf("< err=%d\n", err);
530 return err;
531 }
533 /** Handler for a peer connection.
534 * Reads a VnetMsg from the connection and handles it.
535 *
536 * @param conn peer connection
537 * @return 0 on success, error code otherwise
538 */
539 int conn_handle_fn(Conn *conn){
540 int err = 0;
541 VnetMsg *vmsg = ALLOCATE(VnetMsg);
542 IPMessage *msg = NULL;
544 dprintf("> addr=%s port=%u\n",
545 inet_ntoa(conn->addr.sin_addr),
546 ntohs(conn->addr.sin_port));
547 err = VnetMsg_unmarshal(conn->in, vmsg);
548 if(err < 0){
549 wprintf("> Unmarshal error %d\n", err);
550 goto exit;
551 }
552 switch(vmsg->hdr.id){
553 case VNET_VARP_ID:
554 dprintf("> Got varp message\n");
555 msg = ALLOCATE(IPMessage);
556 msg->conn = conn;
557 msg->saddr = conn->addr;
558 msg->data = vmsg;
559 err = vcache_handle_message(msg, 0);
560 err = 0;
561 break;
562 case VNET_FWD_ID:
563 dprintf("> Got forward message\n");
564 err = vnetd_forward_local(vnetd, vmsg);
565 err = 0;
566 break;
567 default:
568 wprintf("> Invalid id=%d\n", vmsg->hdr.id);
569 err = -EINVAL;
570 break;
571 }
572 exit:
573 dprintf("< err=%d\n", err);
574 return err;
575 }
577 /** Accept an incoming tcp connection from a peer vnetd.
578 *
579 * @param sock tcp socket
580 * @return 0 on success, error code otherwise
581 */
582 int vnetd_accept(Vnetd *vnetd, Conn *conn){
583 Conn *new_conn = NULL;
584 struct sockaddr_in peer_in;
585 struct sockaddr *peer = (struct sockaddr *)&peer_in;
586 socklen_t peer_n = sizeof(peer_in);
587 int peersock;
588 int err = 0;
590 //dprintf(">\n");
591 new_conn = Conn_new(conn_handle_fn, vnetd);
592 //dprintf("> accept...\n");
593 peersock = accept(conn->sock, peer, &peer_n);
594 //dprintf("> accept=%d\n", peersock);
595 if(peersock < 0){
596 perror("accept");
597 err = -errno;
598 goto exit;
599 }
600 iprintf("> Accepted connection from %s:%d\n",
601 inet_ntoa(peer_in.sin_addr), htons(peer_in.sin_port));
602 err = Conn_init(new_conn, peersock, SOCK_STREAM, peer_in);
603 if(err) goto exit;
604 connections_add(vnetd, new_conn);
605 exit:
606 if(err){
607 Conn_close(new_conn);
608 }
609 if(err < 0) wprintf("< err=%d\n", err);
610 return err;
611 }
613 /** Connect to a peer vnetd.
614 *
615 * @param vnetd vnetd
616 * @param addr address
617 * @param port port
618 * @return 0 on success, error code otherwise
619 */
620 int vnetd_connect(Vnetd *vnetd, struct in_addr addr, uint16_t port){
621 Conn *conn = NULL;
622 int err = 0;
624 //dprintf(">\n");
625 conn = Conn_new(conn_handle_fn, vnetd);
626 err = Conn_connect(conn, SOCK_STREAM, addr, port);
627 if(err) goto exit;
628 connections_add(vnetd, conn);
629 exit:
630 if(err){
631 Conn_close(conn);
632 }
633 //dprintf(" < err=%d\n", err);
634 return err;
635 }
637 /** Handle a message on the udp socket.
638 * Expecting to see VARP messages only.
639 *
640 * @param sock udp socket
641 * @return 0 on success, error code otherwise
642 */
643 int vnetd_handle_udp(Vnetd *vnetd, Conn *conn){
644 int err = 0, rcv = 0;
645 struct sockaddr_in self_in;
646 struct sockaddr_in peer_in;
647 struct sockaddr *peer = (struct sockaddr *)&peer_in;
648 socklen_t peer_n = sizeof(peer_in);
649 VnetMsg *vmsg = NULL;
650 void *data;
651 int data_n;
652 int flags = 0;
653 IPMessage *msg = NULL;
655 //dprintf(">\n");
656 self_in = vnetd->addr;
657 vmsg = ALLOCATE(VnetMsg);
658 data = &vmsg->varp.varph;
659 data_n = sizeof(VarpHdr);
660 rcv = recvfrom(conn->sock, data, data_n, flags, peer, &peer_n);
661 if(rcv < 0){
662 err = rcv;
663 goto exit;
664 }
665 dprintf("> Received %d bytes from %s:%d\n",
666 rcv, inet_ntoa(peer_in.sin_addr), htons(peer_in.sin_port));
667 if(rcv != data_n){
668 err = -EINVAL;
669 goto exit;
670 }
671 if(peer_in.sin_addr.s_addr == self_in.sin_addr.s_addr){
672 //dprintf("> Ignoring message from self.\n");
673 goto exit;
674 }
675 msg = ALLOCATE(IPMessage);
676 msg->conn = conn;
677 msg->saddr = peer_in;
678 msg->data = vmsg;
680 err = vcache_handle_message(msg, 1);
681 exit:
682 //dprintf("< err=%d\n", err);
683 return err;
684 }
686 /** Handle a message on a raw socket.
687 * Only deals with etherip and esp.
688 * Forwards messages to peers.
689 *
690 * @param vnetd vnetd
691 * @param sock socket
692 * @param protocol protocol
693 * @return 0 on success, error code otherwise
694 */
695 int vnetd_handle_protocol(Vnetd *vnetd, int sock, int protocol){
696 int err = 0, rcv = 0;
697 struct sockaddr_in self_in;
698 struct sockaddr_in peer_in;
699 struct sockaddr *peer = (struct sockaddr *)&peer_in;
700 socklen_t peer_n = sizeof(peer_in);
701 uint8_t buf[VNET_FWD_MAX];
702 int buf_n = sizeof(buf);
703 char *data, *end;
704 int flags = 0;
705 struct iphdr *iph = NULL;
707 //dprintf(">\n");
708 self_in = vnetd->addr;
709 rcv = recvfrom(sock, buf, buf_n, flags, peer, &peer_n);
710 if(rcv < 0){
711 err = rcv;
712 goto exit;
713 }
714 dprintf("> Received %d bytes from %s protocol=%d\n",
715 rcv, inet_ntoa(peer_in.sin_addr), protocol);
716 if(rcv < sizeof(struct iphdr)){
717 wprintf("> Message too short for IP header\n");
718 err = -EINVAL;
719 goto exit;
720 }
721 if(peer_in.sin_addr.s_addr == self_in.sin_addr.s_addr){
722 dprintf("> Ignoring message from self.\n");
723 goto exit;
724 }
725 data = buf;
726 end = buf + rcv;
727 iph = (void*)data;
728 data += (iph->ihl << 2);
729 vnetd_forward_peers(vnetd, protocol, data, end - data);
730 exit:
731 //dprintf("< err=%d\n", err);
732 return err;
733 }
735 /** Socket select loop.
736 * Accepts connections on the tcp socket and handles
737 * messages on the other sockets.
738 *
739 * @return 0 on success, error code otherwise
740 */
741 int vnetd_select(Vnetd *vnetd){
742 int err = 0;
743 SelectSet set = {};
744 while(1){
745 SelectSet_zero(&set);
746 SelectSet_add_read(&set, vnetd->udp_conn->sock);
747 SelectSet_add_read(&set, vnetd->bcast_conn->sock);
748 SelectSet_add_read(&set, vnetd->etherip_sock);
749 SelectSet_add_read(&set, vnetd->esp_sock);
750 SelectSet_add_read(&set, vnetd->listen_conn->sock);
751 connections_select(vnetd, &set);
752 err = SelectSet_select(&set, NULL);
753 if(err == 0) continue;
754 if(err < 0){
755 if(errno == EINTR){
756 if(timer_alarms){
757 timer_alarms = 0;
758 process_timers();
759 }
760 continue;
761 }
762 perror("select");
763 goto exit;
764 }
765 if(FD_ISSET(vnetd->udp_conn->sock, &set.rd)){
766 vnetd_handle_udp(vnetd, vnetd->udp_conn);
767 }
768 if(FD_ISSET(vnetd->bcast_conn->sock, &set.rd)){
769 vnetd_handle_udp(vnetd, vnetd->bcast_conn);
770 }
771 if(FD_ISSET(vnetd->etherip_sock, &set.rd)){
772 vnetd_handle_protocol(vnetd, vnetd->etherip_sock, IPPROTO_ETHERIP);
773 }
774 if(FD_ISSET(vnetd->esp_sock, &set.rd)){
775 vnetd_handle_protocol(vnetd, vnetd->esp_sock, IPPROTO_ESP);
776 }
777 connections_handle(vnetd, &set);
778 if(FD_ISSET(vnetd->listen_conn->sock, &set.rd)){
779 vnetd_accept(vnetd, vnetd->listen_conn);
780 }
781 }
782 exit:
783 return err;
784 }
786 /** Set socket option to reuse address.
787 */
788 int setsock_reuse(int sock, int reuse){
789 int err = 0;
790 err = setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof(reuse));
791 if(err < 0){
792 err = -errno;
793 perror("setsockopt SO_REUSEADDR");
794 }
795 return err;
796 }
798 /** Set socket broadcast option.
799 */
800 int setsock_broadcast(int sock, int bcast){
801 int err = 0;
802 err = setsockopt(sock, SOL_SOCKET, SO_BROADCAST, &bcast, sizeof(bcast));
803 if(err < 0){
804 err = -errno;
805 perror("setsockopt SO_BROADCAST");
806 }
807 return err;
808 }
810 /** Join a socket to a multicast group.
811 */
812 int setsock_multicast(int sock, uint32_t saddr){
813 int err = 0;
814 struct ip_mreqn mreq = {};
815 int mloop = 0;
816 // See 'man 7 ip' for these options.
817 mreq.imr_multiaddr.s_addr = saddr; // IP multicast address.
818 mreq.imr_address = vnetd->addr.sin_addr; // Interface IP address.
819 mreq.imr_ifindex = 0; // Interface index (0 means any).
820 err = setsockopt(sock, SOL_IP, IP_MULTICAST_LOOP, &mloop, sizeof(mloop));
821 if(err < 0){
822 err = -errno;
823 perror("setsockopt IP_MULTICAST_LOOP");
824 goto exit;
825 }
826 err = setsockopt(sock, SOL_IP, IP_ADD_MEMBERSHIP, &mreq, sizeof(mreq));
827 if(err < 0){
828 err = -errno;
829 perror("setsockopt IP_ADD_MEMBERSHIP");
830 goto exit;
831 }
832 exit:
833 return err;
834 }
836 /** Set a socket's multicast ttl (default is 1).
837 */
838 int setsock_multicast_ttl(int sock, uint8_t ttl){
839 int err = 0;
840 err = setsockopt(sock, SOL_IP, IP_MULTICAST_TTL, &ttl, sizeof(ttl));
841 if(err < 0){
842 err = -errno;
843 perror("setsockopt IP_MULTICAST_TTL");
844 }
845 return err;
846 }
849 char * socket_flags(int flags){
850 static char s[6];
851 int i = 0;
852 s[i++] = (flags & VSOCK_CONNECT ? 'c' : '-');
853 s[i++] = (flags & VSOCK_BIND ? 'b' : '-');
854 s[i++] = (flags & VSOCK_REUSE ? 'r' : '-');
855 s[i++] = (flags & VSOCK_BROADCAST ? 'B' : '-');
856 s[i++] = (flags & VSOCK_MULTICAST ? 'M' : '-');
857 s[i++] = '\0';
858 return s;
859 }
861 /** Create a socket.
862 * The flags can include VSOCK_REUSE, VSOCK_BROADCAST, VSOCK_CONNECT.
863 *
864 * @param socktype socket type
865 * @param saddr address
866 * @param port port
867 * @param flags flags
868 * @param val return value for the socket connection
869 * @return 0 on success, error code otherwise
870 */
871 int create_socket(int socktype, uint32_t saddr, uint32_t port, int flags, Conn **val){
872 int err = 0;
873 int sock = 0;
874 struct sockaddr_in addr_in;
875 struct sockaddr *addr = (struct sockaddr *)&addr_in;
876 socklen_t addr_n = sizeof(addr_in);
877 Conn *conn = NULL;
878 int reuse, bcast;
880 //dprintf(">\n");
881 reuse = (flags & VSOCK_REUSE);
882 bcast = (flags & VSOCK_BROADCAST);
883 addr_in.sin_family = AF_INET;
884 addr_in.sin_addr.s_addr = saddr;
885 addr_in.sin_port = port;
886 dprintf("> flags=%s addr=%s port=%d\n", socket_flags(flags),
887 inet_ntoa(addr_in.sin_addr), ntohs(addr_in.sin_port));
889 sock = socket(AF_INET, socktype, 0);
890 if(sock < 0){
891 err = -errno;
892 goto exit;
893 }
894 if(reuse){
895 err = setsock_reuse(sock, reuse);
896 if(err < 0) goto exit;
897 }
898 if(bcast){
899 err = setsock_broadcast(sock, bcast);
900 if(err < 0) goto exit;
901 }
902 if(flags & VSOCK_MULTICAST){
903 err = setsock_multicast(sock, saddr);
904 if(err < 0) goto exit;
905 }
906 if(flags & VSOCK_CONNECT){
907 err = connect(sock, addr, addr_n);
908 if(err < 0){
909 err = -errno;
910 perror("connect");
911 goto exit;
912 }
913 }
914 if(flags & VSOCK_BIND){
915 err = bind(sock, addr, addr_n);
916 if(err < 0){
917 err = -errno;
918 perror("bind");
919 goto exit;
920 }
921 }
922 conn = Conn_new(NULL, NULL);
923 Conn_init(conn, sock, socktype, addr_in);
924 {
925 struct sockaddr_in self = {};
926 socklen_t self_n;
927 getsockname(conn->sock, (struct sockaddr *)&self, &self_n);
928 dprintf("> sockname sock=%d addr=%s port=%d\n",
929 conn->sock, inet_ntoa(self.sin_addr), ntohs(self.sin_port));
930 }
931 exit:
932 *val = (err ? NULL : conn);
933 //dprintf("< err=%d\n", err);
934 return err;
935 }
937 /** Create the tcp listen socket.
938 *
939 * @param vnetd program arguments
940 * @param val return value for the socket
941 * @return 0 on success, error code otherwise
942 */
943 int vnetd_listen_conn(Vnetd *vnetd, Conn **val){
944 int err = 0;
945 int flags = VSOCK_BIND | VSOCK_REUSE;
946 //dprintf(">\n");
947 err = create_socket(SOCK_STREAM, INADDR_ANY, vnetd->peer_port, flags, val);
948 if(err) goto exit;
949 err = listen((*val)->sock, 5);
950 if(err < 0){
951 err = -errno;
952 perror("listen");
953 goto exit;
954 }
955 exit:
956 if(err && *val){
957 Conn_close(*val);
958 *val = NULL;
959 }
960 //dprintf("< err=%d\n", err);
961 return err;
962 }
964 /** Create the udp socket.
965 *
966 * @param vnetd program arguments
967 * @param val return value for the socket
968 * @return 0 on success, error code otherwise
969 */
970 int vnetd_udp_conn(Vnetd *vnetd, Conn **val){
971 int err = 0;
972 uint32_t addr = INADDR_ANY;
973 uint16_t port = vnetd->port;
974 int flags = (VSOCK_BIND | VSOCK_REUSE);
975 err = create_socket(SOCK_DGRAM, addr, port, flags, val);
976 return err;
977 }
979 /** Create the broadcast socket.
980 *
981 * @param vnetd program arguments
982 * @param val return value for the socket
983 * @return 0 on success, error code otherwise
984 */
985 int vnetd_broadcast_conn(Vnetd *vnetd, Conn **val){
986 int err = 0;
987 uint32_t addr = vnetd_mcast_addr(vnetd);
988 uint16_t port = vnetd_mcast_port(vnetd);
989 int flags = VSOCK_REUSE;
990 int multicast = IN_MULTICAST(ntohl(addr));
992 flags |= VSOCK_MULTICAST;
993 flags |= VSOCK_BROADCAST;
995 err = create_socket(SOCK_DGRAM, addr, port, flags, val);
996 if(err < 0) goto exit;
997 if(multicast){
998 err = setsock_multicast_ttl((*val)->sock, 1);
999 if(err < 0) goto exit;
1001 if(0){
1002 struct sockaddr * addr = (struct sockaddr *)&vnetd->addr;
1003 socklen_t addr_n = sizeof(vnetd->addr);
1004 dprintf("> sock=%d bind addr=%s:%d\n",
1005 (*val)->sock, inet_ntoa(vnetd->addr.sin_addr), ntohs(vnetd->addr.sin_port));
1006 err = bind((*val)->sock, addr, addr_n);
1007 if(err < 0){
1008 err = -errno;
1009 perror("bind");
1010 goto exit;
1013 if(0){
1014 struct sockaddr_in self = {};
1015 socklen_t self_n;
1016 getsockname((*val)->sock, (struct sockaddr *)&self, &self_n);
1017 dprintf("> sockname sock=%d addr=%s port=%d\n",
1018 (*val)->sock, inet_ntoa(self.sin_addr), ntohs(self.sin_port));
1020 exit:
1021 return err;
1024 /** Type for signal handling functions. */
1025 typedef void SignalAction(int code, siginfo_t *info, void *data);
1027 /** Handle SIGCHLD by getting child exit status.
1028 * This prevents child processes being defunct.
1030 * @param code signal code
1031 * @param info signal info
1032 * @param data
1033 */
1034 static void sigaction_SIGCHLD(int code, siginfo_t *info, void *data){
1035 int status;
1036 pid_t pid;
1037 pid = wait(&status);
1038 dprintf("> child pid=%d status=%d\n", pid, status);
1041 /** Handle SIGPIPE.
1043 * @param code signal code
1044 * @param info signal info
1045 * @param data
1046 */
1047 static void sigaction_SIGPIPE(int code, siginfo_t *info, void *data){
1048 dprintf("> SIGPIPE\n");
1051 /** Handle SIGALRM.
1053 * @param code signal code
1054 * @param info signal info
1055 * @param data
1056 */
1057 static void sigaction_SIGALRM(int code, siginfo_t *info, void *data){
1058 //dprintf("> SIGALRM\n");
1059 timer_alarms++;
1062 /** Install a handler for a signal.
1064 * @param signum signal
1065 * @param action handler
1066 * @return 0 on success, error code otherwise
1067 */
1068 static int catch_signal(int signum, SignalAction *action){
1069 int err = 0;
1070 struct sigaction sig = {};
1071 sig.sa_sigaction = action;
1072 sig.sa_flags = SA_SIGINFO;
1073 err = sigaction(signum, &sig, NULL);
1074 if(err){
1075 perror("sigaction");
1077 return err;
1080 /** Create a raw socket.
1082 * @param protocol protocol
1083 * @param flags flags
1084 * @param sock return value for the socket
1085 */
1086 int vnetd_raw_socket(int protocol, int flags, uint32_t mcaddr, int *sock){
1087 int err;
1088 int bcast = (flags & VSOCK_BROADCAST);
1089 //dprintf("> protocol=%d\n", protocol);
1090 err = *sock = socket(AF_INET, SOCK_RAW, protocol);
1091 if(err < 0){
1092 err = -errno;
1093 perror("socket");
1094 goto exit;
1096 if(bcast){
1097 err = setsock_broadcast(*sock, bcast);
1098 if(err < 0) goto exit;
1100 if(flags & VSOCK_MULTICAST){
1101 err = setsock_multicast(*sock, mcaddr);
1102 if(err < 0) goto exit;
1104 exit:
1105 //dprintf("< err=%d\n", err);
1106 return err;
1109 /** Connect to peer vnetds.
1111 * @param vnetd vnetd
1112 * @return 0 on success, error code otherwise
1113 */
1114 int vnetd_peers(Vnetd *vnetd){
1115 int err =0;
1116 Sxpr x, l;
1117 struct in_addr addr = {};
1118 for(l = vnetd->peers; CONSP(l); l = CDR(l)){
1119 x = CAR(l);
1120 addr.s_addr = OBJ_INT(x);
1121 vnetd_connect(vnetd, addr, vnetd->peer_port);
1123 return err;
1126 /** Vnet daemon main program.
1128 * @param vnetd program arguments
1129 * @return 0 on success, error code otherwise
1130 */
1131 int vnetd_main(Vnetd *vnetd){
1132 int err = 0;
1134 //dprintf(">\n");
1135 err = get_self_addr(&vnetd->addr);
1136 vnetd->addr.sin_port = vnetd->port;
1137 iprintf("> VNETD\n");
1138 iprintf("> addr=%s port=%u\n",
1139 inet_ntoa(vnetd->addr.sin_addr), htons(vnetd->port));
1140 iprintf("> mcaddr=%s port=%u\n",
1141 inet_ntoa(vnetd->mcast_addr.sin_addr), htons(vnetd->port));
1142 iprintf("> peers port=%u ", htons(vnetd->peer_port));
1143 objprint(iostdout, vnetd->peers, 0); printf("\n");
1145 err = vcache_init();
1146 err = vnetd_peers(vnetd);
1148 catch_signal(SIGCHLD,sigaction_SIGCHLD);
1149 catch_signal(SIGPIPE,sigaction_SIGPIPE);
1150 catch_signal(SIGALRM,sigaction_SIGALRM);
1151 err = vnetd_listen_conn(vnetd, &vnetd->listen_conn);
1152 if(err < 0) goto exit;
1153 err = vnetd_udp_conn(vnetd, &vnetd->udp_conn);
1154 if(err < 0) goto exit;
1155 err = vnetd_broadcast_conn(vnetd, &vnetd->bcast_conn);
1156 if(err < 0) goto exit;
1158 int flags = (VSOCK_BROADCAST | VSOCK_MULTICAST);
1159 uint32_t mcaddr = vnetd->mcast_addr.sin_addr.s_addr;
1161 err = vnetd_raw_socket(IPPROTO_ETHERIP, flags, mcaddr, &vnetd->etherip_sock);
1162 if(err < 0) goto exit;
1163 err = vnetd_raw_socket(IPPROTO_ESP, flags, mcaddr, &vnetd->esp_sock);
1164 if(err < 0) goto exit;
1166 err = vnetd_select(vnetd);
1167 exit:
1168 Conn_close(vnetd->listen_conn);
1169 Conn_close(vnetd->udp_conn);
1170 Conn_close(vnetd->bcast_conn);
1171 connections_close_all(vnetd);
1172 close(vnetd->etherip_sock);
1173 close(vnetd->esp_sock);
1174 //dprintf("< err=%d\n", err);
1175 return err;
1178 /** Parse command-line arguments and call the vnetd main program.
1180 * @param arg argument count
1181 * @param argv arguments
1182 * @return 0 on success, 1 otherwise
1183 */
1184 extern int main(int argc, char *argv[]){
1185 int err = 0;
1186 int key = 0;
1187 int long_index = 0;
1189 vnetd_set_defaults(vnetd);
1190 while(1){
1191 key = getopt_long(argc, argv, short_opts, long_opts, &long_index);
1192 if(key == -1) break;
1193 switch(key){
1194 case OPT_ADDR:{
1195 unsigned long addr;
1196 err = get_host_address(optarg, &addr);
1197 if(err) goto exit;
1198 vnetd->mcast_addr.sin_addr.s_addr = addr;
1199 break; }
1200 case OPT_PORT:
1201 err = convert_service_to_port(optarg, &vnetd->port);
1202 if(err) goto exit;
1203 break;
1204 case OPT_PEER:{
1205 unsigned long addr;
1206 err = get_host_address(optarg, &addr);
1207 if(err) goto exit;
1208 //cons_push(&vnetd->peers, mkaddress(addr));
1209 cons_push(&vnetd->peers, mkint(addr));
1210 break; }
1211 case OPT_HELP:
1212 usage(0);
1213 break;
1214 case OPT_VERBOSE:
1215 vnetd->verbose = TRUE;
1216 break;
1217 case OPT_VERSION:
1218 iprintf("> %s %s\n", PROGRAM, VERSION);
1219 exit(0);
1220 break;
1221 default:
1222 usage(EINVAL);
1223 break;
1226 err = vnetd_main(vnetd);
1227 exit:
1228 if(err && key > 0){
1229 eprintf("> Error in arg %c\n", key);
1231 return (err ? 1 : 0);