direct-io.hg

view tools/vnet/vnetd/vnetd.c @ 5517:10e9028c8e3d

bitkeeper revision 1.1718.1.10 (42b7b19aqOS_1M8I4pIOFjiTPYWV-g)

Merge bk://xenbits.xensource.com/xen-unstable.bk
into spot.cl.cam.ac.uk:C:/Documents and Settings/iap10/xen-unstable.bk
author iap10@spot.cl.cam.ac.uk
date Tue Jun 21 06:20:10 2005 +0000 (2005-06-21)
parents 0a4b76b6b5a0
children 6bad5eb72ce0 dfaf788ab18c
line source
1 /*
2 * Copyright (C) 2004 Mike Wray <mike.wray@hp.com>.
3 *
4 * This library is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU Lesser General Public License as
6 * published by the Free Software Foundation; either version 2.1 of the
7 * License, or (at your option) any later version. This library is
8 * distributed in the hope that it will be useful, but WITHOUT ANY
9 * WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE.
11 * See the GNU Lesser General Public License for more details.
12 *
13 * You should have received a copy of the GNU Lesser General Public License
14 * along with this library; if not, write to the Free Software Foundation,
15 * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16 */
17 /** @file
18 *
19 * Vnetd tcp messages:
20 *
21 * - varp request: request care-of-addr for a vif.
22 * If know answer, reply. If not broadcast locally.
23 *
24 * - varp announce: reply to a varp request.
25 * If a (local) request is pending, remember and broadcast locally.
26 *
27 * - vnet subscribe: indicate there are local vifs in a vnet (use varp announce?).
28 *
29 * - vnet forward: tunneled broadcast packet to rebroadcast.
30 * Broadcast locally (if there are vifs in the vnet).
31 *
32 *
33 * Vnetd udp messages (varp):
34 *
35 * - local varp request:
36 * If know and vif is non-local, reply.
37 * If know and vif is local, do nothing (but announce will reset).
38 * If have entry saying is local and no-one answers - remove (? or rely on entry timeout).
39 * If don't know and there is no (quick) local reply, forward to peers.
40 *
41 * - remote varp request:
42 * If know, reply.
43 * If don't know, query locally (and queue request).
44 *
45 * - varp announce: remember and adjust vnet subscriptions.
46 * Forward to peers if a request is pending.
47 *
48 * Vnetd broadcast messages (tunneling):
49 *
50 * - etherip: forward to peers (on the right vnets)
51 *
52 * - esp: forward to peers (on the right vnets)
53 *
54 *
55 * For etherip can tell the vnet from the header (in clear).
56 * But for esp can't. So should use mcast to define? Or always some clear header?
57 *
58 * Make ssl on tcp connections optional.
59 *
60 * So far have been assuming esp for security.
61 * But could use vnetd to forward and use ssl on the connection.
62 * But has usual probs with efficiency.
63 * However, should 'just work' if the coa for the vif has been set
64 * to the vnetd. How? Vnetd configured to act as gateway for
65 * some peers? Then would rewrite varp announce to itself and forward
66 * traffic to peer.
67 *
68 * Simplify - make each vnetd have one peer?
69 * If need to link more subnets, add vnetds?
70 *
71 * Need requests table for each tcp conn (incoming).
72 * - entries we want to resolve (and fwd the answer).
73 *
74 * Need requests table for the udp socket.
75 * - entries we want to resolve (and return the answer).
76 *
77 * Need table of entries we know.
78 * - from caching local announce
79 * - from caching announce reply to forwarded request
80 *
81 * Problem with replying to requests from the cache - if the cache
82 * is out of date we reply with incorrect data. So if a VM migrates
83 * we will advertise the old location until it times out.
84 *
85 * So should probably not reply out of the cache at all - but always
86 * query for the answer. Could query direct to old location if
87 * entry is valid the first time, and broadcast if no reply in timeout.
88 * Causes delay if migrated - may as well broadcast.
89 *
90 * Need to watch out for query loops. If have 3 vnetds A,B,C and
91 * A gets a query, forwards to B and C. B forwards to C, which
92 * forwards to A, and on forever. So if have an entry that has been
93 * probed, do not forward it when get another query for it.
94 *
95 * @author Mike Wray <mike.wray@hpl.hp.com>
96 */
99 #include <stdlib.h>
100 #include <unistd.h>
101 #include <stdio.h>
102 #include <getopt.h>
103 #include <errno.h>
104 #include <sys/types.h>
105 #include <time.h>
106 #include <sys/socket.h>
107 #include <netinet/in.h>
108 #include <arpa/inet.h>
109 #include <string.h>
111 #include <signal.h>
112 #include <sys/wait.h>
113 #include <sys/select.h>
115 //#include </usr/include/linux/ip.h> // For struct iphdr;
116 #include <linux/ip.h> // For struct iphdr;
118 #include <linux/if_ether.h>
119 #include "if_etherip.h"
120 #include "if_varp.h"
122 #include "allocate.h"
124 #include "vnetd.h"
125 #include "file_stream.h"
126 #include "string_stream.h"
127 #include "socket_stream.h"
128 #include "sys_net.h"
130 #include "enum.h"
131 #include "sxpr.h"
133 #include "marshal.h"
134 #include "connection.h"
135 #include "select.h"
136 #include "timer.h"
137 #include "vcache.h"
139 int create_socket(int socktype, uint32_t saddr, uint32_t port, int flags, Conn **val);
141 #ifndef TRUE
142 #define TRUE 1
143 #endif
145 #ifndef FALSE
146 #define FALSE 0
147 #endif
149 /** Socket flags. */
150 enum {
151 VSOCK_REUSE=1,
152 VSOCK_BIND=2,
153 VSOCK_CONNECT=4,
154 VSOCK_BROADCAST=8,
155 VSOCK_MULTICAST=16,
156 };
158 #define PROGRAM "vnetd"
159 #define VERSION "0.1"
161 #define MODULE_NAME PROGRAM
162 #define DEBUG
163 #undef DEBUG
164 #include "debug.h"
166 #define OPT_PORT 'p'
167 #define KEY_PORT "port"
168 #define DOC_PORT "<port>\n\t" PROGRAM " UDP port (as a number or service name)"
170 #define OPT_ADDR 'm'
171 #define KEY_ADDR "mcaddr"
172 #define DOC_ADDR "<address>\n\t" PROGRAM " multicast address"
174 #define OPT_PEER 'r'
175 #define KEY_PEER "peer"
176 #define DOC_PEER "<peer>\n\t Peer " PROGRAM " to connect to (IP address or hostname)"
178 #define OPT_FILE 'f'
179 #define KEY_FILE "file"
180 #define DOC_FILE "<file>\n\t Configuration file to load"
182 #define OPT_CTRL 'c'
183 #define KEY_CTRL "control"
184 #define DOC_CTRL "<port>\n\t " PROGRAM " control port (as a number or service name)"
186 #define OPT_HELP 'h'
187 #define KEY_HELP "help"
188 #define DOC_HELP "\n\tprint help"
190 #define OPT_VERSION 'v'
191 #define KEY_VERSION "version"
192 #define DOC_VERSION "\n\tprint version"
194 #define OPT_VERBOSE 'V'
195 #define KEY_VERBOSE "verbose"
196 #define DOC_VERBOSE "\n\tverbose flag"
198 /** Print a usage message.
199 * Prints to stdout if err is zero, and exits with 0.
200 * Prints to stderr if err is non-zero, and exits with 1.
201 *
202 * @param err error code
203 */
204 static void usage(int err){
205 FILE *out = (err ? stderr : stdout);
207 fprintf(out, "Usage: %s [options]\n", PROGRAM);
208 fprintf(out, "-%c, --%s %s\n", OPT_ADDR, KEY_ADDR, DOC_ADDR);
209 fprintf(out, "-%c, --%s %s\n", OPT_PORT, KEY_PORT, DOC_PORT);
210 fprintf(out, "-%c, --%s %s\n", OPT_PEER, KEY_PEER, DOC_PEER);
211 fprintf(out, "-%c, --%s %s\n", OPT_VERBOSE, KEY_VERBOSE, DOC_VERBOSE);
212 fprintf(out, "-%c, --%s %s\n", OPT_VERSION, KEY_VERSION, DOC_VERSION);
213 fprintf(out, "-%c, --%s %s\n", OPT_HELP, KEY_HELP, DOC_HELP);
214 exit(err ? 1 : 0);
215 }
217 /** Short options. Options followed by ':' take an argument. */
218 static char *short_opts = (char[]){
219 OPT_ADDR, ':',
220 OPT_PORT, ':',
221 OPT_PEER, ':',
222 OPT_HELP,
223 OPT_VERSION,
224 OPT_VERBOSE,
225 0 };
227 /** Long options. */
228 static struct option const long_opts[] = {
229 { KEY_ADDR, required_argument, NULL, OPT_ADDR },
230 { KEY_PORT, required_argument, NULL, OPT_PORT },
231 { KEY_PEER, required_argument, NULL, OPT_PEER },
232 { KEY_HELP, no_argument, NULL, OPT_HELP },
233 { KEY_VERSION, no_argument, NULL, OPT_VERSION },
234 { KEY_VERBOSE, no_argument, NULL, OPT_VERBOSE },
235 { NULL, 0, NULL, 0 }
236 };
238 /** Get address of vnetd. So we can ignore broadcast traffic
239 * we sent ourselves.
240 *
241 * @param addr
242 * @return 0 on success, error code otherwise
243 */
244 int get_self_addr(struct sockaddr_in *addr){
245 int err = 0;
246 char hostname[1024] = {};
247 unsigned long saddr;
249 //dprintf(">\n");
250 err = gethostname(hostname, sizeof(hostname) -1);
251 if(err) goto exit;
252 err = get_host_address(hostname, &saddr);
253 if(err == 0){ err = -ENOENT; goto exit; }
254 err = 0;
255 addr->sin_addr.s_addr = saddr;
256 exit:
257 //dprintf("< err=%d\n", err);
258 return err;
259 }
261 /** Marshal a message.
262 *
263 * @param io destination
264 * @param msg message
265 * @return number of bytes written, or negative error code
266 */
267 int VnetMsg_marshal(IOStream *io, VnetMsg *msg){
268 int err = 0;
269 int hdr_n = sizeof(VnetMsgHdr);
271 err = marshal_uint16(io, msg->hdr.id);
272 if(err < 0) goto exit;
273 err = marshal_uint16(io, msg->hdr.opcode);
274 if(err < 0) goto exit;
275 switch(msg->hdr.id){
276 case VNET_VARP_ID:
277 err = marshal_bytes(io, ((char*)msg) + hdr_n, sizeof(VarpHdr) - hdr_n);
278 break;
279 case VNET_FWD_ID:
280 err = marshal_uint16(io, msg->fwd.protocol);
281 if(err < 0) goto exit;
282 err = marshal_uint16(io, msg->fwd.len);
283 if(err < 0) goto exit;
284 err = marshal_bytes(io, msg->fwd.data, msg->fwd.len);
285 break;
286 default:
287 err = -EINVAL;
288 break;
289 }
290 exit:
291 return err;
292 }
294 /** Unmarshal a message.
295 *
296 * @param io source
297 * @param msg message to unmarshal into
298 * @return number of bytes read, or negative error code
299 */
300 int VnetMsg_unmarshal(IOStream *io, VnetMsg *msg){
301 int err = 0;
302 int hdr_n = sizeof(VnetMsgHdr);
304 dprintf("> id\n");
305 err = unmarshal_uint16(io, &msg->hdr.id);
306 if(err < 0) goto exit;
307 dprintf("> opcode\n");
308 err = unmarshal_uint16(io, &msg->hdr.opcode);
309 if(err < 0) goto exit;
310 switch(msg->hdr.id){
311 case VNET_VARP_ID:
312 msg->hdr.opcode = htons(msg->hdr.opcode);
313 dprintf("> varp hdr_n=%d varphdr=%d\n", hdr_n, sizeof(VarpHdr));
314 err = unmarshal_bytes(io, ((char*)msg) + hdr_n, sizeof(VarpHdr) - hdr_n);
315 break;
316 case VNET_FWD_ID:
317 dprintf("> forward\n");
318 err = unmarshal_uint16(io, &msg->fwd.protocol);
319 if(err < 0) goto exit;
320 dprintf("> forward len\n");
321 err = unmarshal_uint16(io, &msg->fwd.len);
322 if(err < 0) goto exit;
323 dprintf("> forward bytes\n");
324 err = unmarshal_bytes(io, msg->fwd.data, msg->fwd.len);
325 break;
326 default:
327 wprintf("> Invalid id %d\n", msg->hdr.id);
328 err = -EINVAL;
329 break;
330 }
331 exit:
332 dprintf("< err=%d \n", err);
333 return err;
334 }
336 Vnetd _vnetd = {};
337 Vnetd *vnetd = &_vnetd;
339 /** Counter for timer alarms.
340 */
341 static unsigned timer_alarms = 0;
343 /** Set vnetd defaults.
344 *
345 * @param vnetd vnetd
346 */
347 void vnetd_set_defaults(Vnetd *vnetd){
348 *vnetd = (Vnetd){};
349 vnetd->port = htons(VNETD_PORT);
350 vnetd->peer_port = vnetd->port; //htons(VNETD_PEER_PORT);
351 vnetd->verbose = FALSE;
352 vnetd->peers = ONULL;
353 vnetd->mcast_addr.sin_addr.s_addr = VARP_MCAST_ADDR;
354 vnetd->mcast_addr.sin_port = vnetd->port;
355 }
357 uint32_t vnetd_mcast_addr(Vnetd *vnetd){
358 return vnetd->mcast_addr.sin_addr.s_addr;
359 }
361 uint16_t vnetd_mcast_port(Vnetd *vnetd){
362 return vnetd->mcast_addr.sin_port;
363 }
365 /** Add a connection to a peer.
366 *
367 * @param vnetd vnetd
368 * @param conn connection
369 */
370 void connections_add(Vnetd *vnetd, Conn *conn){
371 vnetd->connections = ConnList_add(conn, vnetd->connections);
372 }
374 /** Delete a connection to a peer.
375 *
376 * @param vnetd vnetd
377 * @param conn connection
378 */
379 void connections_del(Vnetd *vnetd, Conn *conn){
380 ConnList *prev, *curr, *next;
381 for(prev = NULL, curr = vnetd->connections; curr; prev = curr, curr = next){
382 next = curr->next;
383 if(curr->conn == conn){
384 if(prev){
385 prev->next = curr->next;
386 } else {
387 vnetd->connections = curr->next;
388 }
389 }
390 }
391 }
393 /** Close all connections to peers.
394 *
395 * @param vnetd vnetd
396 */
397 void connections_close_all(Vnetd *vnetd){
398 ConnList *l;
399 for(l = vnetd->connections; l; l = l->next){
400 Conn_close(l->conn);
401 }
402 vnetd->connections = NULL;
403 }
405 /** Add peer connections to a select set.
406 *
407 * @param vnetd vnetd
408 * @param set select set
409 */
410 void connections_select(Vnetd *vnetd, SelectSet *set){
411 ConnList *l;
412 for(l = vnetd->connections; l; l = l->next){
413 SelectSet_add_read(set, l->conn->sock);
414 }
415 }
417 /** Handle peer connections according to a select set.
418 *
419 * @param vnetd vnetd
420 * @param set indicates ready connections
421 */
422 void connections_handle(Vnetd *vnetd, SelectSet *set){
423 ConnList *prev, *curr, *next;
424 Conn *conn;
425 for(prev = NULL, curr = vnetd->connections; curr; prev = curr, curr = next){
426 next = curr->next;
427 conn = curr->conn;
428 if(FD_ISSET(conn->sock, &set->rd)){
429 int conn_err;
430 conn_err = Conn_handle(conn);
431 if(conn_err){
432 if(prev){
433 prev->next = curr->next;
434 } else {
435 vnetd->connections = curr->next;
436 }
437 }
438 }
439 }
440 }
442 /** Forward a message from a peer onto the local subnet.
443 *
444 * @param vnetd vnetd
445 * @param vmsg message
446 * @return 0 on success, error code otherwise
447 */
448 int vnetd_forward_local(Vnetd *vnetd, VnetMsg *vmsg){
449 int err = 0;
450 int sock = 0;
451 struct sockaddr_in addr_in;
452 struct sockaddr *addr = (struct sockaddr *)&addr_in;
453 socklen_t addr_n = sizeof(addr_in);
455 dprintf(">\n");
456 switch(vmsg->fwd.protocol){
457 case IPPROTO_ESP:
458 dprintf("> ESP\n");
459 sock = vnetd->esp_sock; break;
460 case IPPROTO_ETHERIP:
461 dprintf("> Etherip\n");
462 sock = vnetd->etherip_sock; break;
463 default:
464 err = -EINVAL;
465 goto exit;
466 }
467 addr_in.sin_family = AF_INET;
468 addr_in.sin_addr = vnetd->mcast_addr.sin_addr;
469 addr_in.sin_port = htons(vmsg->fwd.protocol);
470 dprintf("> send dst=%s protocol=%d len=%d\n",
471 inet_ntoa(addr_in.sin_addr), vmsg->fwd.protocol, vmsg->fwd.len);
472 err = sendto(sock, vmsg->fwd.data, vmsg->fwd.len, 0, addr, addr_n);
473 exit:
474 dprintf("< err=%d\n", err);
475 return err;
476 }
478 /** Forward a message to a peer.
479 *
480 * @param conn peer connection
481 * @param protocol message protocol
482 * @param data message data
483 * @param data_n message size
484 * @return 0 on success, error code otherwise
485 */
486 int vnetd_forward_peer(Conn *conn, int protocol, void *data, int data_n){
487 int err = 0;
488 IOStream _io, *io = &_io;
489 StringData sdata;
490 char buf[1600];
492 dprintf("> addr=%s protocol=%d n=%d\n",
493 inet_ntoa(conn->addr.sin_addr), protocol, data_n);
494 string_stream_init(io, &sdata, buf, sizeof(buf));
495 dprintf("> 10\n");
496 err = marshal_uint16(io, VNET_FWD_ID);
497 if(err < 0) goto exit;
498 dprintf("> 20\n");
499 err = marshal_uint16(io, 0);
500 if(err < 0) goto exit;
501 dprintf("> 30\n");
502 err = marshal_uint16(io, protocol);
503 if(err < 0) goto exit;
504 dprintf("> 40\n");
505 err = marshal_uint16(io, data_n);
506 if(err < 0) goto exit;
507 dprintf("> 50\n");
508 err = marshal_bytes(io, data, data_n);
509 if(err < 0) goto exit;
510 dprintf("> 60 bytes=%d\n", IOStream_get_written(io));
511 err = IOStream_write(conn->out, buf, IOStream_get_written(io));
512 IOStream_flush(conn->out);
513 exit:
514 if(err < 0) perror(__FUNCTION__);
515 dprintf("< err=%d\n", err);
516 return err;
517 }
519 /** Forward a message to all peers.
520 *
521 * @param vnetd vnetd
522 * @param protocol message protocol
523 * @param data message data
524 * @param data_n message size
525 * @return 0 on success, error code otherwise
526 */
527 int vnetd_forward_peers(Vnetd *vnetd, int protocol, void *data, int data_n){
528 int err = 0;
529 ConnList *curr, *next;
531 dprintf(">\n");
532 for(curr = vnetd->connections; curr; curr = next){
533 next = curr->next;
534 vnetd_forward_peer(curr->conn, protocol, data, data_n);
535 }
536 dprintf("< err=%d\n", err);
537 return err;
538 }
540 /** Handler for a peer connection.
541 * Reads a VnetMsg from the connection and handles it.
542 *
543 * @param conn peer connection
544 * @return 0 on success, error code otherwise
545 */
546 int conn_handle_fn(Conn *conn){
547 int err = 0;
548 VnetMsg *vmsg = ALLOCATE(VnetMsg);
549 IPMessage *msg = NULL;
551 dprintf("> addr=%s port=%u\n",
552 inet_ntoa(conn->addr.sin_addr),
553 ntohs(conn->addr.sin_port));
554 err = VnetMsg_unmarshal(conn->in, vmsg);
555 if(err < 0){
556 wprintf("> Unmarshal error %d\n", err);
557 goto exit;
558 }
559 switch(vmsg->hdr.id){
560 case VNET_VARP_ID:
561 dprintf("> Got varp message\n");
562 msg = ALLOCATE(IPMessage);
563 msg->conn = conn;
564 msg->saddr = conn->addr;
565 msg->data = vmsg;
566 err = vcache_handle_message(msg, 0);
567 err = 0;
568 break;
569 case VNET_FWD_ID:
570 dprintf("> Got forward message\n");
571 err = vnetd_forward_local(vnetd, vmsg);
572 err = 0;
573 break;
574 default:
575 wprintf("> Invalid id=%d\n", vmsg->hdr.id);
576 err = -EINVAL;
577 break;
578 }
579 exit:
580 dprintf("< err=%d\n", err);
581 return err;
582 }
584 /** Accept an incoming tcp connection from a peer vnetd.
585 *
586 * @param sock tcp socket
587 * @return 0 on success, error code otherwise
588 */
589 int vnetd_accept(Vnetd *vnetd, Conn *conn){
590 Conn *new_conn = NULL;
591 struct sockaddr_in peer_in;
592 struct sockaddr *peer = (struct sockaddr *)&peer_in;
593 socklen_t peer_n = sizeof(peer_in);
594 int peersock;
595 int err = 0;
597 //dprintf(">\n");
598 new_conn = Conn_new(conn_handle_fn, vnetd);
599 //dprintf("> accept...\n");
600 peersock = accept(conn->sock, peer, &peer_n);
601 //dprintf("> accept=%d\n", peersock);
602 if(peersock < 0){
603 perror("accept");
604 err = -errno;
605 goto exit;
606 }
607 iprintf("> Accepted connection from %s:%d\n",
608 inet_ntoa(peer_in.sin_addr), htons(peer_in.sin_port));
609 err = Conn_init(new_conn, peersock, SOCK_STREAM, peer_in);
610 if(err) goto exit;
611 connections_add(vnetd, new_conn);
612 exit:
613 if(err){
614 Conn_close(new_conn);
615 }
616 if(err < 0) wprintf("< err=%d\n", err);
617 return err;
618 }
620 /** Connect to a peer vnetd.
621 *
622 * @param vnetd vnetd
623 * @param addr address
624 * @param port port
625 * @return 0 on success, error code otherwise
626 */
627 int vnetd_connect(Vnetd *vnetd, struct in_addr addr, uint16_t port){
628 Conn *conn = NULL;
629 int err = 0;
631 //dprintf(">\n");
632 conn = Conn_new(conn_handle_fn, vnetd);
633 err = Conn_connect(conn, SOCK_STREAM, addr, port);
634 if(err) goto exit;
635 connections_add(vnetd, conn);
636 exit:
637 if(err){
638 Conn_close(conn);
639 }
640 //dprintf(" < err=%d\n", err);
641 return err;
642 }
644 /** Handle a message on the udp socket.
645 * Expecting to see VARP messages only.
646 *
647 * @param sock udp socket
648 * @return 0 on success, error code otherwise
649 */
650 int vnetd_handle_udp(Vnetd *vnetd, Conn *conn){
651 int err = 0, rcv = 0;
652 struct sockaddr_in self_in;
653 struct sockaddr_in peer_in;
654 struct sockaddr *peer = (struct sockaddr *)&peer_in;
655 socklen_t peer_n = sizeof(peer_in);
656 VnetMsg *vmsg = NULL;
657 void *data;
658 int data_n;
659 int flags = 0;
660 IPMessage *msg = NULL;
662 //dprintf(">\n");
663 self_in = vnetd->addr;
664 vmsg = ALLOCATE(VnetMsg);
665 data = &vmsg->varp.varph;
666 data_n = sizeof(VarpHdr);
667 rcv = recvfrom(conn->sock, data, data_n, flags, peer, &peer_n);
668 if(rcv < 0){
669 err = rcv;
670 goto exit;
671 }
672 dprintf("> Received %d bytes from %s:%d\n",
673 rcv, inet_ntoa(peer_in.sin_addr), htons(peer_in.sin_port));
674 if(rcv != data_n){
675 err = -EINVAL;
676 goto exit;
677 }
678 if(peer_in.sin_addr.s_addr == self_in.sin_addr.s_addr){
679 //dprintf("> Ignoring message from self.\n");
680 goto exit;
681 }
682 msg = ALLOCATE(IPMessage);
683 msg->conn = conn;
684 msg->saddr = peer_in;
685 msg->data = vmsg;
687 err = vcache_handle_message(msg, 1);
688 exit:
689 //dprintf("< err=%d\n", err);
690 return err;
691 }
693 /** Handle a message on a raw socket.
694 * Only deals with etherip and esp.
695 * Forwards messages to peers.
696 *
697 * @param vnetd vnetd
698 * @param sock socket
699 * @param protocol protocol
700 * @return 0 on success, error code otherwise
701 */
702 int vnetd_handle_protocol(Vnetd *vnetd, int sock, int protocol){
703 int err = 0, rcv = 0;
704 struct sockaddr_in self_in;
705 struct sockaddr_in peer_in;
706 struct sockaddr *peer = (struct sockaddr *)&peer_in;
707 socklen_t peer_n = sizeof(peer_in);
708 uint8_t buf[VNET_FWD_MAX];
709 int buf_n = sizeof(buf);
710 char *data, *end;
711 int flags = 0;
712 struct iphdr *iph = NULL;
714 //dprintf(">\n");
715 self_in = vnetd->addr;
716 rcv = recvfrom(sock, buf, buf_n, flags, peer, &peer_n);
717 if(rcv < 0){
718 err = rcv;
719 goto exit;
720 }
721 dprintf("> Received %d bytes from %s protocol=%d\n",
722 rcv, inet_ntoa(peer_in.sin_addr), protocol);
723 if(rcv < sizeof(struct iphdr)){
724 wprintf("> Message too short for IP header\n");
725 err = -EINVAL;
726 goto exit;
727 }
728 if(peer_in.sin_addr.s_addr == self_in.sin_addr.s_addr){
729 dprintf("> Ignoring message from self.\n");
730 goto exit;
731 }
732 data = buf;
733 end = buf + rcv;
734 iph = (void*)data;
735 data += (iph->ihl << 2);
736 vnetd_forward_peers(vnetd, protocol, data, end - data);
737 exit:
738 //dprintf("< err=%d\n", err);
739 return err;
740 }
742 /** Socket select loop.
743 * Accepts connections on the tcp socket and handles
744 * messages on the other sockets.
745 *
746 * @return 0 on success, error code otherwise
747 */
748 int vnetd_select(Vnetd *vnetd){
749 int err = 0;
750 SelectSet set = {};
751 while(1){
752 SelectSet_zero(&set);
753 SelectSet_add_read(&set, vnetd->udp_conn->sock);
754 SelectSet_add_read(&set, vnetd->bcast_conn->sock);
755 SelectSet_add_read(&set, vnetd->etherip_sock);
756 SelectSet_add_read(&set, vnetd->esp_sock);
757 SelectSet_add_read(&set, vnetd->listen_conn->sock);
758 connections_select(vnetd, &set);
759 err = SelectSet_select(&set, NULL);
760 if(err == 0) continue;
761 if(err < 0){
762 if(errno == EINTR){
763 if(timer_alarms){
764 timer_alarms = 0;
765 process_timers();
766 }
767 continue;
768 }
769 perror("select");
770 goto exit;
771 }
772 if(FD_ISSET(vnetd->udp_conn->sock, &set.rd)){
773 vnetd_handle_udp(vnetd, vnetd->udp_conn);
774 }
775 if(FD_ISSET(vnetd->bcast_conn->sock, &set.rd)){
776 vnetd_handle_udp(vnetd, vnetd->bcast_conn);
777 }
778 if(FD_ISSET(vnetd->etherip_sock, &set.rd)){
779 vnetd_handle_protocol(vnetd, vnetd->etherip_sock, IPPROTO_ETHERIP);
780 }
781 if(FD_ISSET(vnetd->esp_sock, &set.rd)){
782 vnetd_handle_protocol(vnetd, vnetd->esp_sock, IPPROTO_ESP);
783 }
784 connections_handle(vnetd, &set);
785 if(FD_ISSET(vnetd->listen_conn->sock, &set.rd)){
786 vnetd_accept(vnetd, vnetd->listen_conn);
787 }
788 }
789 exit:
790 return err;
791 }
793 /** Set socket option to reuse address.
794 */
795 int setsock_reuse(int sock, int reuse){
796 int err = 0;
797 err = setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof(reuse));
798 if(err < 0){
799 err = -errno;
800 perror("setsockopt SO_REUSEADDR");
801 }
802 return err;
803 }
805 /** Set socket broadcast option.
806 */
807 int setsock_broadcast(int sock, int bcast){
808 int err = 0;
809 err = setsockopt(sock, SOL_SOCKET, SO_BROADCAST, &bcast, sizeof(bcast));
810 if(err < 0){
811 err = -errno;
812 perror("setsockopt SO_BROADCAST");
813 }
814 return err;
815 }
817 /** Join a socket to a multicast group.
818 */
819 int setsock_multicast(int sock, uint32_t saddr){
820 int err = 0;
821 struct ip_mreqn mreq = {};
822 int mloop = 0;
823 // See 'man 7 ip' for these options.
824 mreq.imr_multiaddr.s_addr = saddr; // IP multicast address.
825 mreq.imr_address = vnetd->addr.sin_addr; // Interface IP address.
826 mreq.imr_ifindex = 0; // Interface index (0 means any).
827 err = setsockopt(sock, SOL_IP, IP_MULTICAST_LOOP, &mloop, sizeof(mloop));
828 if(err < 0){
829 err = -errno;
830 perror("setsockopt IP_MULTICAST_LOOP");
831 goto exit;
832 }
833 err = setsockopt(sock, SOL_IP, IP_ADD_MEMBERSHIP, &mreq, sizeof(mreq));
834 if(err < 0){
835 err = -errno;
836 perror("setsockopt IP_ADD_MEMBERSHIP");
837 goto exit;
838 }
839 exit:
840 return err;
841 }
843 /** Set a socket's multicast ttl (default is 1).
844 */
845 int setsock_multicast_ttl(int sock, uint8_t ttl){
846 int err = 0;
847 err = setsockopt(sock, SOL_IP, IP_MULTICAST_TTL, &ttl, sizeof(ttl));
848 if(err < 0){
849 err = -errno;
850 perror("setsockopt IP_MULTICAST_TTL");
851 }
852 return err;
853 }
856 char * socket_flags(int flags){
857 static char s[6];
858 int i = 0;
859 s[i++] = (flags & VSOCK_CONNECT ? 'c' : '-');
860 s[i++] = (flags & VSOCK_BIND ? 'b' : '-');
861 s[i++] = (flags & VSOCK_REUSE ? 'r' : '-');
862 s[i++] = (flags & VSOCK_BROADCAST ? 'B' : '-');
863 s[i++] = (flags & VSOCK_MULTICAST ? 'M' : '-');
864 s[i++] = '\0';
865 return s;
866 }
868 /** Create a socket.
869 * The flags can include VSOCK_REUSE, VSOCK_BROADCAST, VSOCK_CONNECT.
870 *
871 * @param socktype socket type
872 * @param saddr address
873 * @param port port
874 * @param flags flags
875 * @param val return value for the socket connection
876 * @return 0 on success, error code otherwise
877 */
878 int create_socket(int socktype, uint32_t saddr, uint32_t port, int flags, Conn **val){
879 int err = 0;
880 int sock = 0;
881 struct sockaddr_in addr_in;
882 struct sockaddr *addr = (struct sockaddr *)&addr_in;
883 socklen_t addr_n = sizeof(addr_in);
884 Conn *conn = NULL;
885 int reuse, bcast;
887 //dprintf(">\n");
888 reuse = (flags & VSOCK_REUSE);
889 bcast = (flags & VSOCK_BROADCAST);
890 addr_in.sin_family = AF_INET;
891 addr_in.sin_addr.s_addr = saddr;
892 addr_in.sin_port = port;
893 dprintf("> flags=%s addr=%s port=%d\n", socket_flags(flags),
894 inet_ntoa(addr_in.sin_addr), ntohs(addr_in.sin_port));
896 sock = socket(AF_INET, socktype, 0);
897 if(sock < 0){
898 err = -errno;
899 goto exit;
900 }
901 if(reuse){
902 err = setsock_reuse(sock, reuse);
903 if(err < 0) goto exit;
904 }
905 if(bcast){
906 err = setsock_broadcast(sock, bcast);
907 if(err < 0) goto exit;
908 }
909 if(flags & VSOCK_MULTICAST){
910 err = setsock_multicast(sock, saddr);
911 if(err < 0) goto exit;
912 }
913 if(flags & VSOCK_CONNECT){
914 err = connect(sock, addr, addr_n);
915 if(err < 0){
916 err = -errno;
917 perror("connect");
918 goto exit;
919 }
920 }
921 if(flags & VSOCK_BIND){
922 err = bind(sock, addr, addr_n);
923 if(err < 0){
924 err = -errno;
925 perror("bind");
926 goto exit;
927 }
928 }
929 conn = Conn_new(NULL, NULL);
930 Conn_init(conn, sock, socktype, addr_in);
931 {
932 struct sockaddr_in self = {};
933 socklen_t self_n;
934 getsockname(conn->sock, (struct sockaddr *)&self, &self_n);
935 dprintf("> sockname sock=%d addr=%s port=%d\n",
936 conn->sock, inet_ntoa(self.sin_addr), ntohs(self.sin_port));
937 }
938 exit:
939 *val = (err ? NULL : conn);
940 //dprintf("< err=%d\n", err);
941 return err;
942 }
944 /** Create the tcp listen socket.
945 *
946 * @param vnetd program arguments
947 * @param val return value for the socket
948 * @return 0 on success, error code otherwise
949 */
950 int vnetd_listen_conn(Vnetd *vnetd, Conn **val){
951 int err = 0;
952 int flags = VSOCK_BIND | VSOCK_REUSE;
953 //dprintf(">\n");
954 err = create_socket(SOCK_STREAM, INADDR_ANY, vnetd->peer_port, flags, val);
955 if(err) goto exit;
956 err = listen((*val)->sock, 5);
957 if(err < 0){
958 err = -errno;
959 perror("listen");
960 goto exit;
961 }
962 exit:
963 if(err && *val){
964 Conn_close(*val);
965 *val = NULL;
966 }
967 //dprintf("< err=%d\n", err);
968 return err;
969 }
971 /** Create the udp socket.
972 *
973 * @param vnetd program arguments
974 * @param val return value for the socket
975 * @return 0 on success, error code otherwise
976 */
977 int vnetd_udp_conn(Vnetd *vnetd, Conn **val){
978 int err = 0;
979 uint32_t addr = INADDR_ANY;
980 uint16_t port = vnetd->port;
981 int flags = VSOCK_BIND | VSOCK_REUSE;
982 err = create_socket(SOCK_DGRAM, addr, port, flags, val);
983 return err;
984 }
986 /** Create the broadcast socket.
987 *
988 * @param vnetd program arguments
989 * @param val return value for the socket
990 * @return 0 on success, error code otherwise
991 */
992 int vnetd_broadcast_conn(Vnetd *vnetd, Conn **val){
993 int err = 0;
994 uint32_t addr = vnetd_mcast_addr(vnetd);
995 uint16_t port = vnetd_mcast_port(vnetd);
996 int flags = VSOCK_REUSE;
997 int multicast = IN_MULTICAST(ntohl(addr));
999 flags |= VSOCK_MULTICAST;
1000 flags |= VSOCK_BROADCAST;
1002 err = create_socket(SOCK_DGRAM, addr, port, flags, val);
1003 if(err < 0) goto exit;
1004 if(multicast){
1005 err = setsock_multicast_ttl((*val)->sock, 1);
1006 if(err < 0) goto exit;
1008 if(0){
1009 struct sockaddr * addr = (struct sockaddr *)&vnetd->addr;
1010 socklen_t addr_n = sizeof(vnetd->addr);
1011 dprintf("> sock=%d bind addr=%s:%d\n",
1012 (*val)->sock, inet_ntoa(vnetd->addr.sin_addr), ntohs(vnetd->addr.sin_port));
1013 err = bind((*val)->sock, addr, addr_n);
1014 if(err < 0){
1015 err = -errno;
1016 perror("bind");
1017 goto exit;
1020 if(0){
1021 struct sockaddr_in self = {};
1022 socklen_t self_n;
1023 getsockname((*val)->sock, (struct sockaddr *)&self, &self_n);
1024 dprintf("> sockname sock=%d addr=%s port=%d\n",
1025 (*val)->sock, inet_ntoa(self.sin_addr), ntohs(self.sin_port));
1027 exit:
1028 return err;
1031 /** Type for signal handling functions. */
1032 typedef void SignalAction(int code, siginfo_t *info, void *data);
1034 /** Handle SIGCHLD by getting child exit status.
1035 * This prevents child processes being defunct.
1037 * @param code signal code
1038 * @param info signal info
1039 * @param data
1040 */
1041 static void sigaction_SIGCHLD(int code, siginfo_t *info, void *data){
1042 int status;
1043 pid_t pid;
1044 pid = wait(&status);
1045 dprintf("> child pid=%d status=%d\n", pid, status);
1048 /** Handle SIGPIPE.
1050 * @param code signal code
1051 * @param info signal info
1052 * @param data
1053 */
1054 static void sigaction_SIGPIPE(int code, siginfo_t *info, void *data){
1055 dprintf("> SIGPIPE\n");
1058 /** Handle SIGALRM.
1060 * @param code signal code
1061 * @param info signal info
1062 * @param data
1063 */
1064 static void sigaction_SIGALRM(int code, siginfo_t *info, void *data){
1065 //dprintf("> SIGALRM\n");
1066 timer_alarms++;
1069 /** Install a handler for a signal.
1071 * @param signum signal
1072 * @param action handler
1073 * @return 0 on success, error code otherwise
1074 */
1075 static int catch_signal(int signum, SignalAction *action){
1076 int err = 0;
1077 struct sigaction sig = {};
1078 sig.sa_sigaction = action;
1079 sig.sa_flags = SA_SIGINFO;
1080 err = sigaction(signum, &sig, NULL);
1081 if(err){
1082 perror("sigaction");
1084 return err;
1087 /** Create a raw socket.
1089 * @param protocol protocol
1090 * @param flags flags
1091 * @param sock return value for the socket
1092 */
1093 int vnetd_raw_socket(int protocol, int flags, uint32_t mcaddr, int *sock){
1094 int err;
1095 int bcast = (flags & VSOCK_BROADCAST);
1096 //dprintf("> protocol=%d\n", protocol);
1097 err = *sock = socket(AF_INET, SOCK_RAW, protocol);
1098 if(err < 0){
1099 err = -errno;
1100 perror("socket");
1101 goto exit;
1103 if(bcast){
1104 err = setsock_broadcast(*sock, bcast);
1105 if(err < 0) goto exit;
1107 if(flags & VSOCK_MULTICAST){
1108 err = setsock_multicast(*sock, mcaddr);
1109 if(err < 0) goto exit;
1111 exit:
1112 //dprintf("< err=%d\n", err);
1113 return err;
1116 /** Connect to peer vnetds.
1118 * @param vnetd vnetd
1119 * @return 0 on success, error code otherwise
1120 */
1121 int vnetd_peers(Vnetd *vnetd){
1122 int err =0;
1123 Sxpr x, l;
1124 struct in_addr addr = {};
1125 for(l = vnetd->peers; CONSP(l); l = CDR(l)){
1126 x = CAR(l);
1127 addr.s_addr = OBJ_INT(x);
1128 vnetd_connect(vnetd, addr, vnetd->peer_port);
1130 return err;
1133 /** Vnet daemon main program.
1135 * @param vnetd program arguments
1136 * @return 0 on success, error code otherwise
1137 */
1138 int vnetd_main(Vnetd *vnetd){
1139 int err = 0;
1141 //dprintf(">\n");
1142 err = get_self_addr(&vnetd->addr);
1143 vnetd->addr.sin_port = vnetd->port;
1144 iprintf("> VNETD\n");
1145 iprintf("> addr=%s port=%u\n",
1146 inet_ntoa(vnetd->addr.sin_addr), htons(vnetd->port));
1147 iprintf("> mcaddr=%s port=%u\n",
1148 inet_ntoa(vnetd->mcast_addr.sin_addr), htons(vnetd->port));
1149 iprintf("> peers port=%u ", htons(vnetd->peer_port));
1150 objprint(iostdout, vnetd->peers, 0); printf("\n");
1152 err = vcache_init();
1153 err = vnetd_peers(vnetd);
1155 catch_signal(SIGCHLD,sigaction_SIGCHLD);
1156 catch_signal(SIGPIPE,sigaction_SIGPIPE);
1157 catch_signal(SIGALRM,sigaction_SIGALRM);
1158 err = vnetd_listen_conn(vnetd, &vnetd->listen_conn);
1159 if(err < 0) goto exit;
1160 err = vnetd_udp_conn(vnetd, &vnetd->udp_conn);
1161 if(err < 0) goto exit;
1162 err = vnetd_broadcast_conn(vnetd, &vnetd->bcast_conn);
1163 if(err < 0) goto exit;
1165 int flags = VSOCK_BROADCAST | VSOCK_MULTICAST;
1166 uint32_t mcaddr = vnetd->mcast_addr.sin_addr.s_addr;
1168 err = vnetd_raw_socket(IPPROTO_ETHERIP, flags, mcaddr, &vnetd->etherip_sock);
1169 if(err < 0) goto exit;
1170 err = vnetd_raw_socket(IPPROTO_ESP, flags, mcaddr, &vnetd->esp_sock);
1171 if(err < 0) goto exit;
1173 err = vnetd_select(vnetd);
1174 exit:
1175 Conn_close(vnetd->listen_conn);
1176 Conn_close(vnetd->udp_conn);
1177 Conn_close(vnetd->bcast_conn);
1178 connections_close_all(vnetd);
1179 close(vnetd->etherip_sock);
1180 close(vnetd->esp_sock);
1181 //dprintf("< err=%d\n", err);
1182 return err;
1185 /** Parse command-line arguments and call the vnetd main program.
1187 * @param arg argument count
1188 * @param argv arguments
1189 * @return 0 on success, 1 otherwise
1190 */
1191 extern int main(int argc, char *argv[]){
1192 int err = 0;
1193 int key = 0;
1194 int long_index = 0;
1196 vnetd_set_defaults(vnetd);
1197 while(1){
1198 key = getopt_long(argc, argv, short_opts, long_opts, &long_index);
1199 if(key == -1) break;
1200 switch(key){
1201 case OPT_ADDR:{
1202 unsigned long addr;
1203 err = get_host_address(optarg, &addr);
1204 if(err) goto exit;
1205 vnetd->mcast_addr.sin_addr.s_addr = addr;
1206 break; }
1207 case OPT_PORT:
1208 err = convert_service_to_port(optarg, &vnetd->port);
1209 if(err) goto exit;
1210 break;
1211 case OPT_PEER:{
1212 unsigned long addr;
1213 err = get_host_address(optarg, &addr);
1214 if(err) goto exit;
1215 //cons_push(&vnetd->peers, mkaddress(addr));
1216 cons_push(&vnetd->peers, mkint(addr));
1217 break; }
1218 case OPT_HELP:
1219 usage(0);
1220 break;
1221 case OPT_VERBOSE:
1222 vnetd->verbose = TRUE;
1223 break;
1224 case OPT_VERSION:
1225 iprintf("> %s %s\n", PROGRAM, VERSION);
1226 exit(0);
1227 break;
1228 default:
1229 usage(EINVAL);
1230 break;
1233 err = vnetd_main(vnetd);
1234 exit:
1235 if(err && key > 0){
1236 eprintf("> Error in arg %c\n", key);
1238 return (err ? 1 : 0);