ia64/xen-unstable

view tools/vnet/vnet-module/varp_socket.c @ 6946:e703abaf6e3d

Add behaviour to the remove methods to remove the transaction's path itself. This allows us to write Remove(path) to remove the specified path rather than having to slice the path ourselves.
author emellor@ewan
date Sun Sep 18 14:42:13 2005 +0100 (2005-09-18)
parents 3233e7ecfa9f
children 06d84bf87159
line source
1 /*
2 * Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by the
6 * Free Software Foundation; either version 2 of the License, or (at your
7 * option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
11 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * for more details.
13 *
14 * You should have received a copy of the GNU General Public License along
15 * with this program; if not, write to the Free software Foundation, Inc.,
16 * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA
17 *
18 */
19 #include <linux/kernel.h>
20 #include <linux/types.h>
21 #include <linux/version.h>
23 #include <asm/uaccess.h>
24 #include <linux/net.h>
25 #include <linux/in.h>
26 #include <linux/sched.h>
27 #include <linux/file.h>
28 #include <linux/version.h>
29 #include <linux/smp_lock.h>
30 #include <net/sock.h>
32 #include <if_varp.h>
33 #include <varp.h>
35 /* Get macros needed to define system calls as functions in the kernel. */
36 #define __KERNEL_SYSCALLS__
37 static int errno;
38 #include <linux/unistd.h>
40 #define MODULE_NAME "VARP"
41 #define DEBUG 1
42 #undef DEBUG
43 #include "debug.h"
45 // Compensate for struct sock fields having 'sk_' added
46 // to them in 2.6.
47 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
49 #define SK_RECEIVE_QUEUE sk_receive_queue
50 #define SK_SLEEP sk_sleep
52 #else
54 #define SK_RECEIVE_QUEUE receive_queue
55 #define SK_SLEEP sleep
57 #endif
59 /** @file
60 * Support for the VARP udp sockets.
61 */
63 static inline mm_segment_t change_fs(mm_segment_t fs){
64 mm_segment_t oldfs = get_fs();
65 set_fs(fs);
66 return oldfs;
67 }
69 /* Replicate the user-space socket API.
70 * The parts we need anyway.
71 */
73 /* Define the socketcall() syscall.
74 * Multiplexes all the socket-related calls.
75 *
76 * @param call socket call id
77 * @param args arguments (upto 6)
78 * @return call-dependent value
79 */
80 static inline _syscall2(int, socketcall,
81 int, call,
82 unsigned long *, args)
84 int socket(int family, int type, int protocol){
85 unsigned long args[6];
87 args[0] = (unsigned long)family;
88 args[1] = (unsigned long)type;
89 args[2] = (unsigned long)protocol;
90 return socketcall(SYS_SOCKET, args);
91 }
93 int bind(int fd, struct sockaddr *umyaddr, int addrlen){
94 unsigned long args[6];
96 args[0] = (unsigned long)fd;
97 args[1] = (unsigned long)umyaddr;
98 args[2] = (unsigned long)addrlen;
99 return socketcall(SYS_BIND, args);
100 }
102 int connect(int fd, struct sockaddr *uservaddr, int addrlen){
103 unsigned long args[6];
105 args[0] = (unsigned long)fd;
106 args[1] = (unsigned long)uservaddr;
107 args[2] = (unsigned long)addrlen;
108 return socketcall(SYS_CONNECT, args);
109 }
111 int sendto(int fd, void * buff, size_t len,
112 unsigned flags, struct sockaddr *addr,
113 int addr_len){
114 unsigned long args[6];
116 args[0] = (unsigned long)fd;
117 args[1] = (unsigned long)buff;
118 args[2] = (unsigned long)len;
119 args[3] = (unsigned long)flags;
120 args[4] = (unsigned long)addr;
121 args[5] = (unsigned long)addr_len;
122 return socketcall(SYS_SENDTO, args);
123 }
125 int recvfrom(int fd, void * ubuf, size_t size,
126 unsigned flags, struct sockaddr *addr,
127 int *addr_len){
128 unsigned long args[6];
130 args[0] = (unsigned long)fd;
131 args[1] = (unsigned long)ubuf;
132 args[2] = (unsigned long)size;
133 args[3] = (unsigned long)flags;
134 args[4] = (unsigned long)addr;
135 args[5] = (unsigned long)addr_len;
136 return socketcall(SYS_RECVFROM, args);
137 }
139 int setsockopt(int fd, int level, int optname, void *optval, int optlen){
140 unsigned long args[6];
142 args[0] = (unsigned long)fd;
143 args[1] = (unsigned long)level;
144 args[2] = (unsigned long)optname;
145 args[3] = (unsigned long)optval;
146 args[4] = (unsigned long)optlen;
147 return socketcall(SYS_SETSOCKOPT, args);
148 }
150 int getsockopt(int fd, int level, int optname, void *optval, int *optlen){
151 unsigned long args[6];
153 args[0] = (unsigned long)fd;
154 args[1] = (unsigned long)level;
155 args[2] = (unsigned long)optname;
156 args[3] = (unsigned long)optval;
157 args[4] = (unsigned long)optlen;
158 return socketcall(SYS_GETSOCKOPT, args);
159 }
161 int shutdown(int fd, int how){
162 unsigned long args[6];
164 args[0] = (unsigned long)fd;
165 args[1] = (unsigned long)how;
166 return socketcall(SYS_SHUTDOWN, args);
167 }
169 int getsockname(int fd, struct sockaddr *usockaddr, int *usockaddr_len){
170 unsigned long args[6];
172 args[0] = (unsigned long)fd;
173 args[1] = (unsigned long)usockaddr;
174 args[2] = (unsigned long)usockaddr_len;
175 return socketcall(SYS_GETSOCKNAME, args);
176 }
178 /*============================================================================*/
179 /** Socket flags. */
180 enum VsockFlag {
181 VSOCK_REUSE = 1,
182 VSOCK_BIND = 2,
183 VSOCK_CONNECT = 4,
184 VSOCK_BROADCAST = 8,
185 VSOCK_MULTICAST = 16,
186 };
188 /** Convert socket flags to a string.
189 *
190 * @param flags flags
191 * @return static string
192 */
193 char * socket_flags(int flags){
194 static char s[6];
195 int i = 0;
196 s[i++] = (flags & VSOCK_CONNECT ? 'c' : '-');
197 s[i++] = (flags & VSOCK_BIND ? 'b' : '-');
198 s[i++] = (flags & VSOCK_REUSE ? 'r' : '-');
199 s[i++] = (flags & VSOCK_BROADCAST ? 'B' : '-');
200 s[i++] = (flags & VSOCK_MULTICAST ? 'M' : '-');
201 s[i++] = '\0';
202 return s;
203 }
205 /** The varp multicast socket. */
206 int varp_mcast_sock = -1;
208 /** The varp unicast socket. */
209 int varp_ucast_sock = -1;
211 /** Control flag for whether varp should be running.
212 * If this is set 0 then the varp thread will notice and
213 * (eventually) exit. This is indicated by setting varp_running
214 * to 0.
215 */
216 atomic_t varp_run = ATOMIC_INIT(0);
218 /** State flag indicating whether the varp thread is running. */
219 atomic_t varp_running = ATOMIC_INIT(0);
221 /** Set socket option to reuse address.
222 *
223 * @param sock socket
224 * @param reuse flag
225 * @return 0 on success, error code otherwise
226 */
227 int setsock_reuse(int sock, int reuse){
228 int err = 0;
229 err = setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof(reuse));
230 if(err < 0){
231 eprintf("> setsockopt SO_REUSEADDR: %d %d\n", err, errno);
232 }
233 return err;
234 }
236 /** Set socket broadcast option.
237 *
238 * @param sock socket
239 * @param bcast flag
240 * @return 0 on success, error code otherwise
241 */
242 int setsock_broadcast(int sock, int bcast){
243 int err = 0;
244 err = setsockopt(sock, SOL_SOCKET, SO_BROADCAST, &bcast, sizeof(bcast));
245 if(err < 0){
246 eprintf("> setsockopt SO_BROADCAST: %d %d\n", err, errno);
247 }
248 return err;
249 }
251 /** Join a socket to a multicast group.
252 *
253 * @param sock socket
254 * @param saddr multicast address
255 * @return 0 on success, error code otherwise
256 */
257 int setsock_multicast(int sock, uint32_t saddr){
258 int err = 0;
259 struct ip_mreqn mreq = {};
260 int mloop = 0;
262 // See 'man 7 ip' for these options.
263 mreq.imr_multiaddr.s_addr = saddr; // IP multicast address.
264 mreq.imr_address.s_addr = INADDR_ANY; // Interface IP address.
265 mreq.imr_ifindex = 0; // Interface index (0 means any).
266 err = setsockopt(sock, SOL_IP, IP_MULTICAST_LOOP, &mloop, sizeof(mloop));
267 if(err < 0){
268 eprintf("> setsockopt IP_MULTICAST_LOOP: %d %d\n", err, errno);
269 goto exit;
270 }
271 err = setsockopt(sock, SOL_IP, IP_ADD_MEMBERSHIP, &mreq, sizeof(mreq));
272 if(err < 0){
273 eprintf("> setsockopt IP_ADD_MEMBERSHIP: %d %d\n", err, errno);
274 goto exit;
275 }
276 exit:
277 err = 0; //todo: remove hack
278 return err;
279 }
281 /** Set a socket's multicast ttl (default is 1).
282 * @param sock socket
283 * @param ttl ttl
284 * @return 0 on success, error code otherwise
285 */
286 int setsock_multicast_ttl(int sock, uint8_t ttl){
287 int err = 0;
288 err = setsockopt(sock, SOL_IP, IP_MULTICAST_TTL, &ttl, sizeof(ttl));
289 return err;
290 }
292 /** Create a socket.
293 * The flags can include values from enum VsockFlag.
294 *
295 * @param socktype socket type
296 * @param saddr address
297 * @param port port
298 * @param flags flags
299 * @param val return value for the socket connection
300 * @return 0 on success, error code otherwise
301 */
302 int create_socket(int socktype, uint32_t saddr, uint32_t port, int flags, int *val){
303 int err = 0;
304 int sock;
305 struct sockaddr_in addr_in;
306 struct sockaddr *addr = (struct sockaddr *)&addr_in;
307 int addr_n = sizeof(addr_in);
308 int reuse, bcast;
309 int sockproto = 0;
311 //dprintf(">\n");
312 reuse = (flags & VSOCK_REUSE);
313 bcast = (flags & VSOCK_BROADCAST);
314 addr_in.sin_family = AF_INET;
315 addr_in.sin_addr.s_addr = saddr;
316 addr_in.sin_port = port;
317 dprintf("> flags=%s addr=%u.%u.%u.%u port=%d\n",
318 socket_flags(flags),
319 NIPQUAD(saddr), ntohs(port));
321 switch(socktype){
322 case SOCK_DGRAM: sockproto = IPPROTO_UDP; break;
323 case SOCK_STREAM: sockproto = IPPROTO_TCP; break;
324 }
325 sock = socket(AF_INET, socktype, sockproto);
326 if(sock < 0) goto exit;
327 if(reuse){
328 err = setsock_reuse(sock, reuse);
329 if(err < 0) goto exit;
330 }
331 if(bcast){
332 err = setsock_broadcast(sock, bcast);
333 if(err < 0) goto exit;
334 }
335 if(flags & VSOCK_MULTICAST){
336 err = setsock_multicast(sock, saddr);
337 if(err < 0) goto exit;
338 }
339 if(flags & VSOCK_CONNECT){
340 err = connect(sock, addr, addr_n);
341 if(err < 0) goto exit;
342 }
343 if(flags & VSOCK_BIND){
344 err = bind(sock, addr, addr_n);
345 if(err < 0) goto exit;
346 }
347 exit:
348 *val = (err ? -1 : sock);
349 if(err) eprintf("> err=%d errno=%d\n", err, errno);
350 return err;
351 }
353 /** Open the varp multicast socket.
354 *
355 * @param mcaddr multicast address
356 * @param port port
357 * @param val return parameter for the socket
358 * @return 0 on success, error code otherwise
359 */
360 int varp_mcast_open(uint32_t mcaddr, uint16_t port, int *val){
361 int err = 0;
362 int flags = VSOCK_REUSE;
363 int multicast = MULTICAST(mcaddr);
364 int sock = 0;
366 dprintf(">\n");
367 flags |= VSOCK_MULTICAST;
368 flags |= VSOCK_BROADCAST;
370 err = create_socket(SOCK_DGRAM, mcaddr, port, flags, &sock);
371 if(err < 0) goto exit;
372 if(multicast){
373 err = setsock_multicast_ttl(sock, 1);
374 if(err < 0) goto exit;
375 }
376 exit:
377 if(err){
378 shutdown(sock, 2);
379 }
380 *val = (err ? -1 : sock);
381 dprintf("< err=%d val=%d\n", err, *val);
382 return err;
383 }
385 /** Open the varp unicast socket.
386 *
387 * @param addr address
388 * @param port port
389 * @param val return parameter for the socket
390 * @return 0 on success, error code otherwise
391 */
392 int varp_ucast_open(uint32_t addr, u16 port, int *val){
393 int err = 0;
394 int flags = (VSOCK_BIND | VSOCK_REUSE);
395 dprintf(">\n");
396 err = create_socket(SOCK_DGRAM, addr, port, flags, val);
397 dprintf("< err=%d val=%d\n", err, *val);
398 return err;
399 }
401 /* Here because inline in 'socket.c'. */
402 #ifndef sockfd_put
403 #define sockfd_put(sock) fput((sock)->file)
404 #endif
406 /** Get the next skb from a socket's receive queue.
407 *
408 * @param fd socket file descriptor
409 * @return skb or NULL
410 */
411 static struct sk_buff *get_sock_skb(int fd){
412 int err = 0;
413 struct sk_buff *skb = NULL;
414 struct socket *sock = NULL;
416 sock = sockfd_lookup(fd, &err);
417 if (!sock){
418 dprintf("> no sock for fd=%d\n", fd);
419 goto exit;
420 }
421 skb = skb_dequeue(&sock->sk->SK_RECEIVE_QUEUE);
422 //skb = skb_recv_datagram(sock->sk, 0, 1, &recv_err);
423 sockfd_put(sock);
424 exit:
425 return skb;
426 }
428 /** Handle the next skb on a socket (if any).
429 *
430 * @param fd socket file descriptor
431 * @return 1 if there was an skb, 0 otherwise
432 */
433 static int handle_sock_skb(int fd){
434 int ret = 0;
435 struct sk_buff *skb = get_sock_skb(fd);
436 if(skb){
437 ret = 1;
438 dprintf("> skb fd=%d skb=%p\n", fd, skb);
439 varp_handle_message(skb);
440 kfree_skb(skb);
441 }
442 return ret;
443 }
445 /** Add a wait queue to a socket.
446 *
447 * @param fd socket file descriptor
448 * @param waitq queue
449 * @return 0 on success, error code otherwise
450 */
451 int sock_add_wait_queue(int fd, wait_queue_t *waitq){
452 int err = 0;
453 struct socket *sock = NULL;
455 dprintf("> fd=%d\n", fd);
456 sock = sockfd_lookup(fd, &err);
457 if (!sock) goto exit;
458 add_wait_queue(sock->sk->SK_SLEEP, waitq);
459 sockfd_put(sock);
460 exit:
461 dprintf("< err=%d\n", err);
462 return err;
463 }
465 /** Remove a wait queue from a socket.
466 *
467 * @param fd socket file descriptor
468 * @param waitq queue
469 * @return 0 on success, error code otherwise
470 */
471 int sock_remove_wait_queue(int fd, wait_queue_t *waitq){
472 int err = 0;
473 struct socket *sock = NULL;
475 sock = sockfd_lookup(fd, &err);
476 if (!sock) goto exit;
477 remove_wait_queue(sock->sk->SK_SLEEP, waitq);
478 sockfd_put(sock);
479 exit:
480 return err;
481 }
483 /** Loop handling the varp sockets.
484 * We use kernel API for this (waitqueue, schedule_timeout) instead
485 * of select because the select syscall was returning EFAULT. Oh well.
486 *
487 * @param arg arguments
488 * @return exit code
489 */
490 int varp_main(void *arg){
491 int err = 0;
492 long timeout = 3 * HZ;
493 int count = 0;
494 int n = 0;
495 DECLARE_WAITQUEUE(mcast_wait, current);
496 DECLARE_WAITQUEUE(ucast_wait, current);
498 dprintf("> start\n");
499 atomic_set(&varp_running, 1);
500 err = sock_add_wait_queue(varp_mcast_sock, &mcast_wait);
501 err = sock_add_wait_queue(varp_ucast_sock, &ucast_wait);
502 for(n = 1; atomic_read(&varp_run) == 1; n++){
503 count = 0;
504 count += handle_sock_skb(varp_mcast_sock);
505 count += handle_sock_skb(varp_ucast_sock);
506 if(!count){
507 // No skbs were handled, so go back to sleep.
508 set_current_state(TASK_INTERRUPTIBLE);
509 schedule_timeout(timeout);
510 current->state = TASK_RUNNING;
511 }
512 }
513 sock_remove_wait_queue(varp_mcast_sock, &mcast_wait);
514 sock_remove_wait_queue(varp_ucast_sock, &ucast_wait);
515 atomic_set(&varp_running, 0);
516 //MOD_DEC_USE_COUNT;
517 dprintf("< stop err=%d\n", err);
518 return err;
519 }
521 /** Start the varp thread.
522 *
523 * @return 0 on success, error code otherwise
524 */
525 int varp_start(void){
526 int err = 0;
527 void *args = NULL;
528 int flags = 0;
529 long pid = 0;
531 dprintf(">\n");
532 //flags |= CLONE_VM;
533 flags |= CLONE_FS;
534 flags |= CLONE_FILES;
535 flags |= CLONE_SIGHAND;
536 atomic_set(&varp_run, 1);
537 atomic_set(&varp_running, 0);
538 pid = kernel_thread(varp_main, args, flags);
539 dprintf("< pid=%ld\n", pid);
540 return err;
541 }
543 /** Close the varp sockets and stop the thread handling them.
544 */
545 void varp_close(void){
546 mm_segment_t oldfs;
547 long timeout = 1 * HZ;
548 int tries = 10;
549 dprintf(">\n");
550 // Tell the varp thread to stop and wait a while for it.
551 atomic_set(&varp_run, 0);
552 while(atomic_read(&varp_running) && tries-- > 0){
553 set_current_state(TASK_INTERRUPTIBLE);
554 schedule_timeout(timeout);
555 current->state = TASK_RUNNING;
556 }
557 // Close the sockets.
558 oldfs = change_fs(KERNEL_DS);
559 if(varp_mcast_sock > 0){
560 shutdown(varp_mcast_sock, 2);
561 varp_mcast_sock = -1;
562 }
563 if(varp_ucast_sock > 0){
564 shutdown(varp_ucast_sock, 2);
565 varp_ucast_sock = -1;
566 }
567 set_fs(oldfs);
568 //MOD_DEC_USE_COUNT;
569 dprintf("<\n");
570 }
572 /** Open the varp sockets and start the thread handling them.
573 *
574 * @param mcaddr multicast address
575 * @param port port
576 * @return 0 on success, error code otherwise
577 */
578 int varp_open(u32 mcaddr, u16 port){
579 int err = 0;
580 mm_segment_t oldfs;
582 //MOD_INC_USE_COUNT;
583 dprintf("> mcaddr=%u.%u.%u.%u port=%u\n",
584 NIPQUAD(mcaddr), ntohs(port));
585 oldfs = change_fs(KERNEL_DS);
586 err = varp_mcast_open(mcaddr, port, &varp_mcast_sock);
587 if(err < 0 ) goto exit;
588 err = varp_ucast_open(INADDR_ANY, port, &varp_ucast_sock);
589 if(err < 0 ) goto exit;
590 set_fs(oldfs);
591 err = varp_start();
592 exit:
593 set_fs(oldfs);
594 if(err){
595 varp_close();
596 }
597 dprintf("< err=%d\n", err);
598 return err;
599 }