ia64/xen-unstable

view tools/vnet/vnet-module/varp_socket.c @ 8740:3d7ea7972b39

Update patches for linux 2.6.15.

Signed-off-by: Christian Limpach <Christian.Limpach@cl.cam.ac.uk>
author cl349@firebug.cl.cam.ac.uk
date Thu Feb 02 17:16:00 2006 +0000 (2006-02-02)
parents 06d84bf87159
children 71b0f00f6344
line source
1 /*
2 * Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by the
6 * Free Software Foundation; either version 2 of the License, or (at your
7 * option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
11 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * for more details.
13 *
14 * You should have received a copy of the GNU General Public License along
15 * with this program; if not, write to the Free software Foundation, Inc.,
16 * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA
17 *
18 */
19 #include <linux/kernel.h>
20 #include <linux/types.h>
21 #include <linux/version.h>
23 #include <asm/uaccess.h>
24 #include <linux/net.h>
25 #include <linux/in.h>
26 #include <linux/sched.h>
27 #include <linux/file.h>
28 #include <linux/version.h>
29 #include <linux/smp_lock.h>
30 #include <net/sock.h>
32 #include <if_varp.h>
33 #include <varp.h>
35 /* Get macros needed to define system calls as functions in the kernel. */
36 #define __KERNEL_SYSCALLS__
37 static int errno;
38 #include <linux/unistd.h>
40 #define MODULE_NAME "VARP"
41 #define DEBUG 1
42 #undef DEBUG
43 #include "debug.h"
45 // Compensate for struct sock fields having 'sk_' added
46 // to them in 2.6.
47 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
49 #define SK_RECEIVE_QUEUE sk_receive_queue
50 #define SK_SLEEP sk_sleep
52 #else
54 #define SK_RECEIVE_QUEUE receive_queue
55 #define SK_SLEEP sleep
57 #endif
59 /** @file
60 * Support for the VARP udp sockets.
61 */
63 static inline mm_segment_t change_fs(mm_segment_t fs){
64 mm_segment_t oldfs = get_fs();
65 set_fs(fs);
66 return oldfs;
67 }
69 /* Replicate the user-space socket API.
70 * The parts we need anyway.
71 */
73 /* Define the socketcall() syscall.
74 * Multiplexes all the socket-related calls.
75 *
76 * @param call socket call id
77 * @param args arguments (upto 6)
78 * @return call-dependent value
79 */
80 static inline _syscall2(int, socketcall,
81 int, call,
82 unsigned long *, args)
84 int socket(int family, int type, int protocol){
85 unsigned long args[6];
87 args[0] = (unsigned long)family;
88 args[1] = (unsigned long)type;
89 args[2] = (unsigned long)protocol;
90 return socketcall(SYS_SOCKET, args);
91 }
93 int bind(int fd, struct sockaddr *umyaddr, int addrlen){
94 unsigned long args[6];
96 args[0] = (unsigned long)fd;
97 args[1] = (unsigned long)umyaddr;
98 args[2] = (unsigned long)addrlen;
99 return socketcall(SYS_BIND, args);
100 }
102 int connect(int fd, struct sockaddr *uservaddr, int addrlen){
103 unsigned long args[6];
105 args[0] = (unsigned long)fd;
106 args[1] = (unsigned long)uservaddr;
107 args[2] = (unsigned long)addrlen;
108 return socketcall(SYS_CONNECT, args);
109 }
111 int sendto(int fd, void * buff, size_t len,
112 unsigned flags, struct sockaddr *addr,
113 int addr_len){
114 unsigned long args[6];
116 args[0] = (unsigned long)fd;
117 args[1] = (unsigned long)buff;
118 args[2] = (unsigned long)len;
119 args[3] = (unsigned long)flags;
120 args[4] = (unsigned long)addr;
121 args[5] = (unsigned long)addr_len;
122 return socketcall(SYS_SENDTO, args);
123 }
125 int recvfrom(int fd, void * ubuf, size_t size,
126 unsigned flags, struct sockaddr *addr,
127 int *addr_len){
128 unsigned long args[6];
130 args[0] = (unsigned long)fd;
131 args[1] = (unsigned long)ubuf;
132 args[2] = (unsigned long)size;
133 args[3] = (unsigned long)flags;
134 args[4] = (unsigned long)addr;
135 args[5] = (unsigned long)addr_len;
136 return socketcall(SYS_RECVFROM, args);
137 }
139 int setsockopt(int fd, int level, int optname, void *optval, int optlen){
140 unsigned long args[6];
142 args[0] = (unsigned long)fd;
143 args[1] = (unsigned long)level;
144 args[2] = (unsigned long)optname;
145 args[3] = (unsigned long)optval;
146 args[4] = (unsigned long)optlen;
147 return socketcall(SYS_SETSOCKOPT, args);
148 }
150 int getsockopt(int fd, int level, int optname, void *optval, int *optlen){
151 unsigned long args[6];
153 args[0] = (unsigned long)fd;
154 args[1] = (unsigned long)level;
155 args[2] = (unsigned long)optname;
156 args[3] = (unsigned long)optval;
157 args[4] = (unsigned long)optlen;
158 return socketcall(SYS_GETSOCKOPT, args);
159 }
161 int shutdown(int fd, int how){
162 unsigned long args[6];
164 args[0] = (unsigned long)fd;
165 args[1] = (unsigned long)how;
166 return socketcall(SYS_SHUTDOWN, args);
167 }
169 int getsockname(int fd, struct sockaddr *usockaddr, int *usockaddr_len){
170 unsigned long args[6];
172 args[0] = (unsigned long)fd;
173 args[1] = (unsigned long)usockaddr;
174 args[2] = (unsigned long)usockaddr_len;
175 return socketcall(SYS_GETSOCKNAME, args);
176 }
178 /*============================================================================*/
179 /** Socket flags. */
180 enum VsockFlag {
181 VSOCK_REUSE = 1,
182 VSOCK_BIND = 2,
183 VSOCK_CONNECT = 4,
184 VSOCK_BROADCAST = 8,
185 VSOCK_MULTICAST = 16,
186 };
188 /** Convert socket flags to a string.
189 *
190 * @param flags flags
191 * @return static string
192 */
193 char * socket_flags(int flags){
194 static char s[6];
195 int i = 0;
196 s[i++] = (flags & VSOCK_CONNECT ? 'c' : '-');
197 s[i++] = (flags & VSOCK_BIND ? 'b' : '-');
198 s[i++] = (flags & VSOCK_REUSE ? 'r' : '-');
199 s[i++] = (flags & VSOCK_BROADCAST ? 'B' : '-');
200 s[i++] = (flags & VSOCK_MULTICAST ? 'M' : '-');
201 s[i++] = '\0';
202 return s;
203 }
205 /** The varp multicast socket. */
206 int varp_mcast_sock = -1;
208 /** The varp unicast socket. */
209 int varp_ucast_sock = -1;
211 /** Control flag for whether varp should be running.
212 * If this is set 0 then the varp thread will notice and
213 * (eventually) exit. This is indicated by setting varp_running
214 * to 0.
215 */
216 atomic_t varp_run = ATOMIC_INIT(0);
218 /** State flag indicating whether the varp thread is running. */
219 atomic_t varp_running = ATOMIC_INIT(0);
221 /** Set socket option to reuse address.
222 *
223 * @param sock socket
224 * @param reuse flag
225 * @return 0 on success, error code otherwise
226 */
227 int setsock_reuse(int sock, int reuse){
228 int err = 0;
229 err = setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof(reuse));
230 if(err < 0){
231 eprintf("> setsockopt SO_REUSEADDR: %d %d\n", err, errno);
232 }
233 return err;
234 }
236 /** Set socket broadcast option.
237 *
238 * @param sock socket
239 * @param bcast flag
240 * @return 0 on success, error code otherwise
241 */
242 int setsock_broadcast(int sock, int bcast){
243 int err = 0;
244 err = setsockopt(sock, SOL_SOCKET, SO_BROADCAST, &bcast, sizeof(bcast));
245 if(err < 0){
246 eprintf("> setsockopt SO_BROADCAST: %d %d\n", err, errno);
247 }
248 return err;
249 }
251 /** Join a socket to a multicast group.
252 *
253 * @param sock socket
254 * @param saddr multicast address
255 * @return 0 on success, error code otherwise
256 */
257 int setsock_multicast(int sock, uint32_t saddr){
258 int err = 0;
259 struct ip_mreqn mreq = {};
260 int mloop = 0;
262 // See 'man 7 ip' for these options.
263 mreq.imr_multiaddr.s_addr = saddr; // IP multicast address.
264 mreq.imr_address.s_addr = INADDR_ANY; // Interface IP address.
265 mreq.imr_ifindex = 0; // Interface index (0 means any).
266 err = setsockopt(sock, SOL_IP, IP_MULTICAST_LOOP, &mloop, sizeof(mloop));
267 if(err < 0){
268 eprintf("> setsockopt IP_MULTICAST_LOOP: %d %d\n", err, errno);
269 goto exit;
270 }
271 err = setsockopt(sock, SOL_IP, IP_ADD_MEMBERSHIP, &mreq, sizeof(mreq));
272 if(err < 0){
273 eprintf("> setsockopt IP_ADD_MEMBERSHIP: %d %d\n", err, errno);
274 goto exit;
275 }
276 exit:
277 err = 0; //todo: remove hack
278 return err;
279 }
281 /** Set a socket's multicast ttl (default is 1).
282 * @param sock socket
283 * @param ttl ttl
284 * @return 0 on success, error code otherwise
285 */
286 int setsock_multicast_ttl(int sock, uint8_t ttl){
287 int err = 0;
288 err = setsockopt(sock, SOL_IP, IP_MULTICAST_TTL, &ttl, sizeof(ttl));
289 return err;
290 }
292 /** Create a socket.
293 * The flags can include values from enum VsockFlag.
294 *
295 * @param socktype socket type
296 * @param saddr address
297 * @param port port
298 * @param flags flags
299 * @param val return value for the socket connection
300 * @return 0 on success, error code otherwise
301 */
302 int create_socket(int socktype, uint32_t saddr, uint32_t port, int flags, int *val){
303 int err = 0;
304 int sock;
305 struct sockaddr_in addr_in;
306 struct sockaddr *addr = (struct sockaddr *)&addr_in;
307 int addr_n = sizeof(addr_in);
308 int reuse, bcast;
309 int sockproto = 0;
311 //dprintf(">\n");
312 reuse = (flags & VSOCK_REUSE);
313 bcast = (flags & VSOCK_BROADCAST);
314 addr_in.sin_family = AF_INET;
315 addr_in.sin_addr.s_addr = saddr;
316 addr_in.sin_port = port;
317 dprintf("> flags=%s addr=%u.%u.%u.%u port=%d\n",
318 socket_flags(flags),
319 NIPQUAD(saddr), ntohs(port));
321 switch(socktype){
322 case SOCK_DGRAM: sockproto = IPPROTO_UDP; break;
323 case SOCK_STREAM: sockproto = IPPROTO_TCP; break;
324 }
325 sock = socket(AF_INET, socktype, sockproto);
326 if(sock < 0) goto exit;
327 if(reuse){
328 err = setsock_reuse(sock, reuse);
329 if(err < 0) goto exit;
330 }
331 if(bcast){
332 err = setsock_broadcast(sock, bcast);
333 if(err < 0) goto exit;
334 }
335 if(flags & VSOCK_MULTICAST){
336 err = setsock_multicast(sock, saddr);
337 if(err < 0) goto exit;
338 }
339 if(flags & VSOCK_CONNECT){
340 err = connect(sock, addr, addr_n);
341 if(err < 0) goto exit;
342 }
343 if(flags & VSOCK_BIND){
344 err = bind(sock, addr, addr_n);
345 if(err < 0) goto exit;
346 }
347 exit:
348 *val = (err ? -1 : sock);
349 if(err) eprintf("> err=%d errno=%d\n", err, errno);
350 return err;
351 }
353 /** Open the varp multicast socket.
354 *
355 * @param mcaddr multicast address
356 * @param port port
357 * @param val return parameter for the socket
358 * @return 0 on success, error code otherwise
359 */
360 int varp_mcast_open(uint32_t mcaddr, uint16_t port, int *val){
361 int err = 0;
362 int flags = VSOCK_REUSE;
363 int multicast = MULTICAST(mcaddr);
364 int sock = 0;
366 dprintf(">\n");
367 flags |= VSOCK_MULTICAST;
368 flags |= VSOCK_BROADCAST;
370 err = create_socket(SOCK_DGRAM, mcaddr, port, flags, &sock);
371 if(err < 0) goto exit;
372 if(multicast){
373 err = setsock_multicast_ttl(sock, 1);
374 if(err < 0) goto exit;
375 }
376 exit:
377 if(err){
378 shutdown(sock, 2);
379 }
380 *val = (err ? -1 : sock);
381 dprintf("< err=%d val=%d\n", err, *val);
382 return err;
383 }
385 /** Open the varp unicast socket.
386 *
387 * @param addr address
388 * @param port port
389 * @param val return parameter for the socket
390 * @return 0 on success, error code otherwise
391 */
392 int varp_ucast_open(uint32_t addr, u16 port, int *val){
393 int err = 0;
394 int flags = (VSOCK_BIND | VSOCK_REUSE);
395 dprintf(">\n");
396 err = create_socket(SOCK_DGRAM, addr, port, flags, val);
397 dprintf("< err=%d val=%d\n", err, *val);
398 return err;
399 }
401 /* Here because inline in 'socket.c'. */
402 #ifndef sockfd_put
403 #define sockfd_put(sock) fput((sock)->file)
404 #endif
406 /** Get the next skb from a socket's receive queue.
407 *
408 * @param fd socket file descriptor
409 * @return skb or NULL
410 */
411 static struct sk_buff *get_sock_skb(int fd){
412 int err = 0;
413 struct sk_buff *skb = NULL;
414 struct socket *sock = NULL;
416 sock = sockfd_lookup(fd, &err);
417 if (!sock){
418 dprintf("> no sock for fd=%d\n", fd);
419 goto exit;
420 }
421 skb = skb_dequeue(&sock->sk->SK_RECEIVE_QUEUE);
422 //skb = skb_recv_datagram(sock->sk, 0, 1, &recv_err);
423 sockfd_put(sock);
424 exit:
425 return skb;
426 }
428 /** Handle the next skb on a socket (if any).
429 *
430 * @param fd socket file descriptor
431 * @return 1 if there was an skb, 0 otherwise
432 */
433 static int handle_sock_skb(int fd){
434 int ret = 0;
435 struct sk_buff *skb = get_sock_skb(fd);
436 if(skb){
437 ret = 1;
438 dprintf("> skb fd=%d skb=%p\n", fd, skb);
439 varp_handle_message(skb);
440 kfree_skb(skb);
441 }
442 return ret;
443 }
445 /** Add a wait queue to a socket.
446 *
447 * @param fd socket file descriptor
448 * @param waitq queue
449 * @return 0 on success, error code otherwise
450 */
451 int sock_add_wait_queue(int fd, wait_queue_t *waitq){
452 int err = 0;
453 struct socket *sock = NULL;
455 dprintf("> fd=%d\n", fd);
456 sock = sockfd_lookup(fd, &err);
457 if (!sock) goto exit;
458 add_wait_queue(sock->sk->SK_SLEEP, waitq);
459 sockfd_put(sock);
460 exit:
461 dprintf("< err=%d\n", err);
462 return err;
463 }
465 /** Remove a wait queue from a socket.
466 *
467 * @param fd socket file descriptor
468 * @param waitq queue
469 * @return 0 on success, error code otherwise
470 */
471 int sock_remove_wait_queue(int fd, wait_queue_t *waitq){
472 int err = 0;
473 struct socket *sock = NULL;
475 sock = sockfd_lookup(fd, &err);
476 if (!sock) goto exit;
477 remove_wait_queue(sock->sk->SK_SLEEP, waitq);
478 sockfd_put(sock);
479 exit:
480 return err;
481 }
483 /** Loop handling the varp sockets.
484 * We use kernel API for this (waitqueue, schedule_timeout) instead
485 * of select because the select syscall was returning EFAULT. Oh well.
486 *
487 * @param arg arguments
488 * @return exit code
489 */
490 int varp_main(void *arg){
491 int err = 0;
492 long timeout = 3 * HZ;
493 int count = 0;
494 int n = 0;
495 DECLARE_WAITQUEUE(mcast_wait, current);
496 DECLARE_WAITQUEUE(ucast_wait, current);
498 dprintf("> start\n");
499 atomic_set(&varp_running, 1);
500 err = sock_add_wait_queue(varp_mcast_sock, &mcast_wait);
501 err = sock_add_wait_queue(varp_ucast_sock, &ucast_wait);
502 for(n = 1; atomic_read(&varp_run) == 1; n++){
503 count = 0;
504 count += handle_sock_skb(varp_mcast_sock);
505 count += handle_sock_skb(varp_ucast_sock);
506 if(!count){
507 // No skbs were handled, so go back to sleep.
508 set_current_state(TASK_INTERRUPTIBLE);
509 schedule_timeout(timeout);
510 current->state = TASK_RUNNING;
511 }
512 }
513 sock_remove_wait_queue(varp_mcast_sock, &mcast_wait);
514 sock_remove_wait_queue(varp_ucast_sock, &ucast_wait);
515 atomic_set(&varp_running, 0);
516 //MOD_DEC_USE_COUNT;
517 dprintf("< stop err=%d\n", err);
518 return err;
519 }
521 /** Start the varp thread.
522 *
523 * @return 0 on success, error code otherwise
524 */
525 int varp_start(void){
526 int err = 0;
527 void *args = NULL;
528 int flags = 0;
529 long pid = 0;
531 dprintf(">\n");
532 //flags |= CLONE_VM;
533 flags |= CLONE_FS;
534 flags |= CLONE_FILES;
535 flags |= CLONE_SIGHAND;
536 atomic_set(&varp_run, 1);
537 atomic_set(&varp_running, 0);
538 pid = kernel_thread(varp_main, args, flags);
539 dprintf("< pid=%ld\n", pid);
540 return err;
541 }
543 /** Close the varp sockets and stop the thread handling them.
544 */
545 void varp_close(void){
546 mm_segment_t oldfs;
547 long timeout = 1 * HZ;
548 int tries = 10;
549 dprintf(">\n");
550 // Tell the varp thread to stop and wait a while for it.
551 atomic_set(&varp_run, 0);
552 while(atomic_read(&varp_running) && tries-- > 0){
553 set_current_state(TASK_INTERRUPTIBLE);
554 schedule_timeout(timeout);
555 current->state = TASK_RUNNING;
556 }
557 // Close the sockets.
558 oldfs = change_fs(KERNEL_DS);
559 if(varp_mcast_sock > 0){
560 shutdown(varp_mcast_sock, 2);
561 varp_mcast_sock = -1;
562 }
563 if(varp_ucast_sock > 0){
564 shutdown(varp_ucast_sock, 2);
565 varp_ucast_sock = -1;
566 }
567 set_fs(oldfs);
568 //MOD_DEC_USE_COUNT;
569 dprintf("<\n");
570 }
572 /** Open the varp sockets and start the thread handling them.
573 *
574 * @param mcaddr multicast address
575 * @param port port
576 * @return 0 on success, error code otherwise
577 */
578 int varp_open(u32 mcaddr, u16 port){
579 int err = 0;
580 mm_segment_t oldfs;
582 //MOD_INC_USE_COUNT;
583 dprintf("> mcaddr=%u.%u.%u.%u port=%u\n",
584 NIPQUAD(mcaddr), ntohs(port));
585 oldfs = change_fs(KERNEL_DS);
586 err = varp_mcast_open(mcaddr, port, &varp_mcast_sock);
587 if(err < 0 ) goto exit;
588 err = varp_ucast_open(INADDR_ANY, port, &varp_ucast_sock);
589 if(err < 0 ) goto exit;
590 set_fs(oldfs);
591 err = varp_start();
592 exit:
593 set_fs(oldfs);
594 if(err){
595 varp_close();
596 }
597 dprintf("< err=%d\n", err);
598 return err;
599 }