ia64/xen-unstable

changeset 8813:71b0f00f6344

Update vnets to support UDP encapsulation, multicast forwarding
and optionally running in user-space.

Signed-off-by: Mike Wray <mike.wray@hp.com>
author kaf24@firebug.cl.cam.ac.uk
date Thu Feb 09 16:12:11 2006 +0100 (2006-02-09)
parents a0e7daa2df33
children c63083610678
files tools/vnet/00INSTALL tools/vnet/00README tools/vnet/Make.env tools/vnet/Makefile tools/vnet/doc/Makefile tools/vnet/doc/man/vn.pod.1 tools/vnet/doc/vnet-module.txt tools/vnet/doc/vnet-xend.txt tools/vnet/examples/Makefile tools/vnet/examples/vnet97.sxp tools/vnet/examples/vnet98.sxp tools/vnet/examples/vnet99.sxp tools/vnet/libxutil/Makefile tools/vnet/libxutil/hash_table.c tools/vnet/libxutil/hash_table.h tools/vnet/libxutil/mem_stream.c tools/vnet/libxutil/sxpr.h tools/vnet/libxutil/sxpr_parser.c tools/vnet/libxutil/sys_net.c tools/vnet/scripts/Makefile tools/vnet/scripts/vn tools/vnet/vnet-module/Makefile-2.6 tools/vnet/vnet-module/Makefile.ver tools/vnet/vnet-module/Makefile.vnet tools/vnet/vnet-module/esp.c tools/vnet/vnet-module/esp.h tools/vnet/vnet-module/etherip.c tools/vnet/vnet-module/etherip.h tools/vnet/vnet-module/if_etherip.h tools/vnet/vnet-module/if_varp.h tools/vnet/vnet-module/random.c tools/vnet/vnet-module/random.h tools/vnet/vnet-module/sa.c tools/vnet/vnet-module/sa.h tools/vnet/vnet-module/skb_context.h tools/vnet/vnet-module/skb_util.c tools/vnet/vnet-module/skb_util.h tools/vnet/vnet-module/sxpr_util.c tools/vnet/vnet-module/sxpr_util.h tools/vnet/vnet-module/timer_util.c tools/vnet/vnet-module/timer_util.h tools/vnet/vnet-module/tunnel.c tools/vnet/vnet-module/tunnel.h tools/vnet/vnet-module/varp.c tools/vnet/vnet-module/varp.h tools/vnet/vnet-module/varp_socket.c tools/vnet/vnet-module/varp_util.c tools/vnet/vnet-module/varp_util.h tools/vnet/vnet-module/vif.c tools/vnet/vnet-module/vif.h tools/vnet/vnet-module/vnet.c tools/vnet/vnet-module/vnet.h tools/vnet/vnet-module/vnet_dev.c tools/vnet/vnet-module/vnet_dev.h tools/vnet/vnet-module/vnet_eval.c tools/vnet/vnet-module/vnet_eval.h tools/vnet/vnet-module/vnet_forward.c tools/vnet/vnet-module/vnet_forward.h tools/vnet/vnet-module/vnet_ioctl.c tools/vnet/vnetd/Makefile tools/vnet/vnetd/connection.c tools/vnet/vnetd/connection.h tools/vnet/vnetd/list.h tools/vnet/vnetd/select.c tools/vnet/vnetd/select.h tools/vnet/vnetd/selector.c tools/vnet/vnetd/selector.h tools/vnet/vnetd/skbuff.c tools/vnet/vnetd/skbuff.h tools/vnet/vnetd/spinlock.c tools/vnet/vnetd/spinlock.h tools/vnet/vnetd/sys_kernel.h tools/vnet/vnetd/timer.c tools/vnet/vnetd/timer.h tools/vnet/vnetd/vnetd.c
line diff
     1.1 --- a/tools/vnet/00INSTALL	Thu Feb 09 16:09:00 2006 +0100
     1.2 +++ b/tools/vnet/00INSTALL	Thu Feb 09 16:12:11 2006 +0100
     1.3 @@ -1,3 +1,5 @@
     1.4 +This directory contains the implementation of vnets:
     1.5 +virtual private networks for virtual machines.
     1.6  
     1.7  make
     1.8          - compile in local dirs. The module is in vnet-module/vnet_module.ko.
     1.9 @@ -9,6 +11,12 @@ make dist
    1.10  make install
    1.11          - compile and install into system.
    1.12  
    1.13 +By default the makefiles expect this code to have been installed
    1.14 +in tools/vnet in a xen source tree. If compiling outside the xen
    1.15 +source tree, set XEN_ROOT to the location of the xen source.
    1.16 +You can do this in the environment or in a Make.local file
    1.17 +in the current directory (see Make.env for details).
    1.18 +
    1.19  The xen0 kernel must have been compiled before building the vnet module.
    1.20  The vnet module installs to
    1.21   /lib/modules/<kernel version>-xen0/kernel/xen/vnet_module.ko
     2.1 --- a/tools/vnet/00README	Thu Feb 09 16:09:00 2006 +0100
     2.2 +++ b/tools/vnet/00README	Thu Feb 09 16:12:11 2006 +0100
     2.3 @@ -1,10 +1,15 @@
     2.4  This directory contains the implementation of vnets:
     2.5  virtual private networks for virtual machines.
     2.6 -See doc/ for more information and examples/ for example
     2.7 -configurations.
     2.8  
     2.9 -The kernel module is in vnet-module/ and the vnet forwarding
    2.10 -daemon is in vnetd/. The vnetd daemon makes vnets work across
    2.11 -subnets when multicast routing is not available.
    2.12 +See 00INSTALL for build instructions, doc/ for more information
    2.13 +and examples/ for example configurations.
    2.14 +
    2.15 +The vnet implementation can be run using a kernel module
    2.16 +or a user-space daemon. The kernel module is in vnet-module/ and the
    2.17 +user-space daemon (varpd) is in vnetd/. The user-space daemon
    2.18 +needs the tun/tap kernel module. Vnets use multicast to find
    2.19 +virtual interfaces and support broadcast. Either implementation can
    2.20 +tunnel multicast packets to other implementations if wide-area
    2.21 +multicast routing is not available.
    2.22  
    2.23  Mike Wray <mike.wray@hp.com>
    2.24 \ No newline at end of file
     3.1 --- a/tools/vnet/Make.env	Thu Feb 09 16:09:00 2006 +0100
     3.2 +++ b/tools/vnet/Make.env	Thu Feb 09 16:12:11 2006 +0100
     3.3 @@ -1,6 +1,16 @@
     3.4  # -*- mode: Makefile; -*-
     3.5  
     3.6 -export XEN_ROOT        = $(shell cd $(VNET_ROOT)/../.. && pwd)
     3.7 +# Include any local overrides.
     3.8 +-include $(VNET_ROOT)/Make.local
     3.9 +
    3.10 +# If building vnets outside the xen source tree, set XEN_ROOT to the
    3.11 +# absolute path of the root of the xen source tree. Edit this file
    3.12 +# or set XEN_ROOT in Make.local, the make command line or
    3.13 +# the environment. For example put this in Make.local:
    3.14 +# export XEN_ROOT = $(shell cd ~/xen-unstable.hg && pwd)
    3.15 +
    3.16 +export XEN_ROOT ?= $(shell cd $(VNET_ROOT)/../.. && pwd)
    3.17 +
    3.18  export LINUX_SERIES   ?= 2.6
    3.19  
    3.20  DISTDIR               ?= $(XEN_ROOT)/dist
    3.21 @@ -10,11 +20,9 @@ export VNET_MODULE_DIR = $(VNET_ROOT)/vn
    3.22  export VNETD_DIR       = $(VNET_ROOT)/vnetd
    3.23  export LIBXUTIL_DIR    = $(VNET_ROOT)/libxutil
    3.24  
    3.25 +
    3.26  export GC_DIR          = $(VNET_ROOT)/build/gc
    3.27  export GC_INCLUDE      = $(GC_DIR)/include
    3.28  export GC_LIB_DIR      = $(GC_DIR)/lib
    3.29  export GC_LIB_A        = $(GC_LIB_DIR)/libgc.a
    3.30  export GC_LIB_SO       = $(GC_LIB_DIR)/libgc.so
    3.31 -
    3.32 -#$(warning XEN_ROOT  = $(XEN_ROOT))
    3.33 -#$(warning DESTDIR   = $(DESTDIR))
     4.1 --- a/tools/vnet/Makefile	Thu Feb 09 16:09:00 2006 +0100
     4.2 +++ b/tools/vnet/Makefile	Thu Feb 09 16:12:11 2006 +0100
     4.3 @@ -7,9 +7,11 @@ endif
     4.4  
     4.5  .PHONY: all compile install dist clean pristine
     4.6  .PHONY: gc-all gc-install gc-clean
     4.7 +.PHONY: help
     4.8  
     4.9  SUBDIRS:=
    4.10  SUBDIRS+= examples
    4.11 +SUBDIRS+= scripts
    4.12  SUBDIRS+= gc
    4.13  SUBDIRS+= libxutil
    4.14  SUBDIRS+= vnetd
    4.15 @@ -60,3 +62,21 @@ clean: $(call subtgt,clean)
    4.16  
    4.17  pristine: clean
    4.18  	-@$(RM) gc.tar.gz
    4.19 +
    4.20 +help:
    4.21 +	@echo 'Cleaning targets:'
    4.22 +	@echo '  clean     - clean subdirs and remove the build dir'
    4.23 +	@echo '  pristine  - clean, then remove the gc tarball'
    4.24 +	@echo ''
    4.25 +	@echo 'Installation targets:'
    4.26 +	@echo '  install   - build and install relative to /'
    4.27 +	@echo '  dist      - build and install relative to DESTDIR (default XEN_ROOT/dist/install)'
    4.28 +	@echo ''
    4.29 +	@echo 'Compilation targets:'
    4.30 +	@echo '  all       - same as compile'
    4.31 +	@echo '  compile   - build everything'
    4.32 +	@echo ''
    4.33 +	@echo 'To build everything locally use "make" or "make all"'.
    4.34 +	@echo 'To build and install into XEN_ROOT/dist/install use "make dist".'
    4.35 +	@echo 'To build and install into the system use "make dist".'
    4.36 +	@echo 'See ./00README and ./00INSTALL for more information.'
     5.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     5.2 +++ b/tools/vnet/doc/Makefile	Thu Feb 09 16:12:11 2006 +0100
     5.3 @@ -0,0 +1,50 @@
     5.4 +#!/usr/bin/make -f
     5.5 +# -*- mode: Makefile; -*-
     5.6 +
     5.7 +VERSION = 1.0
     5.8 +HEADER  = Vnet
     5.9 +
    5.10 +INSTALL		= install
    5.11 +INSTALL_DIR	= $(INSTALL) -d -m0755
    5.12 +
    5.13 +PS2PDF		:= ps2pdf
    5.14 +DVIPS		:= dvips
    5.15 +LATEX		:= latex
    5.16 +LATEX2HTML	:= latex2html
    5.17 +DOXYGEN		:= doxygen
    5.18 +POD2MAN		:= pod2man
    5.19 +
    5.20 +MAN_DIR		:= /usr/share/man
    5.21 +
    5.22 +DOC_MAN5SRC	:= $(wildcard man/*.pod.5)
    5.23 +DOC_MAN1SRC	:= $(wildcard man/*.pod.1)
    5.24 +DOC_MAN1	:= $(patsubst man/%.pod.1,man1/%.1,$(DOC_MAN1SRC))
    5.25 +DOC_MAN5	:= $(patsubst man/%.pod.5,man5/%.5,$(DOC_MAN5SRC))
    5.26 +
    5.27 +.PHONY: all man clean install
    5.28 +
    5.29 +all: man
    5.30 +
    5.31 +man:
    5.32 +	@if which $(POD2MAN) 1>/dev/null 2>/dev/null; then \
    5.33 +	$(MAKE) $(DOC_MAN1) $(DOC_MAN5); fi
    5.34 +
    5.35 +man1/%.1: man/%.pod.1 Makefile
    5.36 +	$(INSTALL_DIR) $(@D)
    5.37 +	$(POD2MAN) --release=$(VERSION) --name=`echo $@ | sed 's/^man1.//'| \
    5.38 +		sed 's/.1//'` -s 1 -c $(HEADER) $< $@
    5.39 +
    5.40 +man5/%.5: man/%.pod.5 Makefile
    5.41 +	$(INSTALL_DIR) $(@D)
    5.42 +	$(POD2MAN) --release=$(VERSION) --name=`echo $@ | sed 's/^man5.//'| \
    5.43 +		sed 's/.5//'` -s 5 -c $(HEADER) $< $@
    5.44 +
    5.45 +clean:
    5.46 +	@$(RM) -rf man5
    5.47 +	@$(RM) -rf man1
    5.48 +
    5.49 + install: all
    5.50 +	$(INSTALL_DIR) $(DESTDIR)$(MAN_DIR)
    5.51 +	$(CP) -dR man1 $(DESTDIR)$(MAN_DIR)
    5.52 +	$(CP) -dR man5 $(DESTDIR)$(MAN_DIR)
    5.53 +
     6.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     6.2 +++ b/tools/vnet/doc/man/vn.pod.1	Thu Feb 09 16:12:11 2006 +0100
     6.3 @@ -0,0 +1,176 @@
     6.4 +=head1 NAME
     6.5 +
     6.6 +vn - Vnet (virtual networking) management utility.
     6.7 +
     6.8 +=head1 SYNOPSIS
     6.9 +
    6.10 +vn <command> [args]
    6.11 +
    6.12 +=head1 DESCRIPTION
    6.13 +
    6.14 +The B<vn> utility manages vnets, virtual networks for virtual machines.
    6.15 +Before using vnets, the vnet kernel module must be installed or
    6.16 +the user-space daemon vnetd must be running. Using the kernel module is recommended,
    6.17 +see the B<insmod> command below.
    6.18 +
    6.19 +A vnet is a virtual network that behaves like a private LAN, transporting
    6.20 +Ethernet frames. Each vnet is identified by a 128-bit vnet id and
    6.21 +has a network device that interfaces to it. Ethernet packets written
    6.22 +to the device are encapsulated and sent to the network.
    6.23 +Received vnet packets are decapsulated and delivered from the device
    6.24 +corresponding to their vnet id. The default encapsulation uses UDP on port 1798.
    6.25 +
    6.26 +Usually each vnet device is enslaved to a corresponding bridge, and virtual
    6.27 +machine interfaces are attached to vnets by enslaving them to the bridge.
    6.28 +Each vnet behaves like a private LAN: traffic on one vnet is not visible
    6.29 +on other vnets, and interfaces on a vnet cannot see traffic on the
    6.30 +physical network. 
    6.31 +
    6.32 +Vnets can be connected together into larger networks
    6.33 +by direct bridging or packet forwarding, or by using multihomed vms
    6.34 +with interfaces on several vnets, or vnets and the physical network.
    6.35 +As vnet interfaces are discovered dynamically, vnet connectivity is maintained
    6.36 +if a vm using a vnet is migrated from one physical machine to another.
    6.37 +
    6.38 +In the commands vnet ids can be given in two forms. Long form, as 8 4-digit hex fields
    6.39 +separated by colons, for example 0000:0000:0000:0000:0000:0000:0000:0004, and
    6.40 +short form as a hex field, for example 0004 or 4. The short form is the same as the
    6.41 +long form with the first 7 fields zero. Vnet id 0000:0000:0000:0000:0000:0000:0000:0001
    6.42 +is reserved for the physical network and has no vnet device.
    6.43 +
    6.44 +Vnets use multicast to discover the location of virtual interfaces, by default
    6.45 +using multicast group 224.10.0.1. If all the machines hosting vnets are on
    6.46 +the same subnet, or reachable by multicast, vnets will span all the machines
    6.47 +automatically. If some machines are not reachable by multicast you can configure
    6.48 +vnets to perform multicast forwarding using UDP. 
    6.49 +
    6.50 +The vnet devices are fully-functional network devices, so you can add IP addresses
    6.51 +to them and test connectivity without any vms running.
    6.52 +For example, using vnif0004 on machines A and B:
    6.53 +
    6.54 +        A> ifconfig vnif0004 10.0.0.11
    6.55 +        B> ifconfig vnif0004 10.0.0.12
    6.56 +        B> ping 10.0.0.11
    6.57 +
    6.58 +If the vnet device is enslaved to a bridge you will have to add the IP address
    6.59 +to the bridge instead. Use C<brctl show> or C<vn vnets> to see if a vnet
    6.60 +device is on a bridge.
    6.61 +
    6.62 +=over 4
    6.63 +
    6.64 +=item B<insmod> I<[varp_mcaddr=ADDR]>
    6.65 +
    6.66 +Insert the vnet kernel module, optionally supplying the multicast
    6.67 +address to use, default 224.10.0.1.
    6.68 +
    6.69 +=item B<varp>
    6.70 +
    6.71 +Print varp infrormation and varp cache.
    6.72 +
    6.73 +=item B<vnets> [options]
    6.74 +
    6.75 +Print the list of vnets (virtual networks). If a vnet device is on a bridge,
    6.76 +also shows the bridge and its bridged interfaces.
    6.77 +
    6.78 +=over 4
    6.79 +
    6.80 +=item B<-a | --all>
    6.81 +
    6.82 +Also print the vifs on each vnet and varp information.
    6.83 +
    6.84 +=item B<-l | --long>
    6.85 +
    6.86 +Also print the ifconfig for the vnet devices.
    6.87 +
    6.88 +=back
    6.89 +
    6.90 +=item B<vnet-create> I<[options]> I<vnetid>
    6.91 +
    6.92 +Create a vnet with the given id. The options are:
    6.93 +
    6.94 +=over 4
    6.95 +
    6.96 +=item B<-s | --security> I<level>
    6.97 +
    6.98 +Security level, which can be one of I<none> for no security,
    6.99 +I<auth> for message authentication, and I<conf> for message
   6.100 +authentication and confidentiality. The default is no security.
   6.101 +Security is provided using IPSEC, but uses hard-wired keys.
   6.102 +
   6.103 +=item B<-b | --bridge> I<bridgename>
   6.104 +
   6.105 +Create a bridge for the vnet called I<bridgename> and enslave
   6.106 +the vnet device to it.
   6.107 +
   6.108 +=item B<-v | --vnetif> I<vnetifname>
   6.109 +
   6.110 +Use I<vnetifname> as the name for the vnet device. If this option
   6.111 +is not specified the default isto  name the device vnifN where N
   6.112 +is the last field of the vnet id as 4 hex characters.
   6.113 +For example vnif0004. Network device names can be at
   6.114 +most 14 characters.
   6.115 +
   6.116 +=back
   6.117 +
   6.118 +=item B<vnet-delete> I<[options]> I<vnetid>
   6.119 +
   6.120 +Delete the vnet with the given id. The vnet device goes away too.
   6.121 +
   6.122 +=over 4
   6.123 +
   6.124 +=item B<-b | --bridge>
   6.125 +
   6.126 +If this option is specified, delete the bridge associated with the vnet.
   6.127 +
   6.128 +=back
   6.129 +
   6.130 +=item B<vifs>
   6.131 +
   6.132 +Print the list of vifs (virtual interfaces).
   6.133 +
   6.134 +=item B<vif-add> I<[-i|-interface]> I<vnet> I<vmac>
   6.135 +
   6.136 +Add a vif to a vnet. Here I<vnet> is the vnet id and I<vmac>
   6.137 +is the vif's MAC address. Alternatively, I<vmac> can be the name of
   6.138 +a network device if the I<-i> or -I<--interface> flag is given.
   6.139 +
   6.140 +It is not usually necessary to use B<vif-add> as vnets automatically
   6.141 +add vifs for the MAC addresses they see.
   6.142 +
   6.143 +=item B<vif-delete> I<[-i|-interface]> I<vnet> I<vmac>
   6.144 +
   6.145 +Delete a vif from a vnet. Here I<vnet> is the vnet id and I<vmac>
   6.146 +is the vif's MAC address. Alternatively, I<vmac> can be the name of
   6.147 +a network device if the I<-i> of -I<--interface> flag is given.
   6.148 +
   6.149 +It is not usually necessary to use B<vif-delete> as vnets periodically
   6.150 +delete unused vifs.
   6.151 +
   6.152 +=item B<peers>
   6.153 +
   6.154 +Print the list of peer vnet machines to forward multicasts to, and accept
   6.155 +forwarded multicasts from.
   6.156 +
   6.157 +=item B<peer-add> I<addr>
   6.158 +
   6.159 +Add the peer with the given IP address or hostname.
   6.160 +
   6.161 +=item B<peer-delete> I<addr>
   6.162 +
   6.163 +Delete the peer with the given IP address or hostname.
   6.164 +
   6.165 +=back
   6.166 +
   6.167 +=head1 AUTHOR
   6.168 +
   6.169 +The author of vn and vnets is Mike Wray of HP Labs. Please send problems, bugs,
   6.170 +enhancements requests etc. to mike.wray@hp.com.
   6.171 +
   6.172 +=head1 COPYRIGHT AND LICENSE
   6.173 +
   6.174 +Copyright (C) 2006 Mike Wray <mike.wray@hp.com>.
   6.175 +
   6.176 +This library is free software; you can redistribute it and/or modify
   6.177 +it under the terms of the GNU Lesser General Public License as published by
   6.178 +the Free Software Foundation; either version 2.1 of the License, or
   6.179 +(at your option) any later version.
   6.180 \ No newline at end of file
     7.1 --- a/tools/vnet/doc/vnet-module.txt	Thu Feb 09 16:09:00 2006 +0100
     7.2 +++ b/tools/vnet/doc/vnet-module.txt	Thu Feb 09 16:12:11 2006 +0100
     7.3 @@ -1,11 +1,14 @@
     7.4 -Vnet Module Command Interface
     7.5 +Vnet Low-level Command Interface
     7.6  Mike Wray <mike.wray@hp.com>
     7.7 -2005/08/25
     7.8 +2006/10/12
     7.9  
    7.10 -When insmod the vnet-module creates /proc/vnet/policy which
    7.11 -can be used to control the module by writing commands into it.
    7.12 -The return code from the command should be returned by close.
    7.13 -Xend uses these commands to implement its vnet interface.
    7.14 +The vnet kernel module and user-space daemon vnetd support a low-level
    7.15 +command interface to control vnets. The kernel module creates /proc/vnet/policy,
    7.16 +which is used by writing commands into it. Vnetd listens on the unix-domain
    7.17 +socket /tmp/vnetd.
    7.18 +
    7.19 +The vn utility in ../scripts provides a higher-level interface to
    7.20 +the vnet commands (using the kernel module or vnetd).
    7.21  
    7.22  The commands are:
    7.23  
    7.24 @@ -37,33 +40,33 @@ Delete the vnet with id <id>.
    7.25  
    7.26  Add the vif with MAC address <macaddr> to the vnet with id <vnetid>.
    7.27  This makes the vnet module respond to VARP requests for <macaddr>
    7.28 -on vnet <vnetid>.
    7.29 +on vnet <vnetid>. The vnet implementation learns MAC addresses
    7.30 +so doing this should not be necessary.
    7.31  
    7.32  (vif.del (vnet <vnetid>) (vmac <macaddr>))
    7.33  
    7.34  Remove the vif with MAC address <macaddr> from the vnet with id <vnetid>.
    7.35  The vnet module will stop responding to VARP for the vif.
    7.36  
    7.37 -(vif.print)
    7.38 -
    7.39 -Print the known vnets, vifs and varp cache on the console.
    7.40 -
    7.41 -Examples:
    7.42 -
    7.43 -To create vnet 10 with no security:
    7.44 -
    7.45 -echo '(vnet.add (id 10))' > /proc/vnet/policy
    7.46 +(peer.add (addr <addr>))
    7.47  
    7.48 -This creates a device vnif0010.
    7.49 -
    7.50 -To create vnet 11 with message authentication:
    7.51 -
    7.52 -echo '(vnet.add (id 11) (security auth))' > /proc/vnet/policy
    7.53 +Add a peer at IP address <addr> to forward multicasts to,
    7.54 +and accept forwarded multicasts from.
    7.55  
    7.56 -To add the vif with vmac "aa:00:00:bc:34:ae" to vnet 10:
    7.57 +(peer.del (addr <addr>))
    7.58  
    7.59 -echo '(vif.add (vnet 10) (vmac aa:00:00:bc:34:ae))' > /proc/vnet/policy
    7.60 +Delete a peer.
    7.61  
    7.62 -To remove the vif from the vnet:
    7.63 +(vif.list)  - get list of vifs.
    7.64 +(vnet.list) - get list of vnets.
    7.65 +(varp.list) - get vnet/varp info.
    7.66 +(peer.list) - get list of peers.
    7.67  
    7.68 -echo '(vif.del (vnet 10) (vmac aa:00:00:bc:34:ae))' > /proc/vnet/policy
    7.69 +The kernel module produces output on the console, and vnetd
    7.70 +returns output on the unix socket. The kernel module also provides
    7.71 +the following files which can be read to get information:
    7.72 +
    7.73 +/proc/vnet/vifs  - get list of vifs.
    7.74 +/proc/vnet/vnets - get list of vnets.
    7.75 +/proc/vnet/varp  - get vnet/varp info.
    7.76 +/proc/vnet/peers - get list of peers.
     8.1 --- a/tools/vnet/doc/vnet-xend.txt	Thu Feb 09 16:09:00 2006 +0100
     8.2 +++ b/tools/vnet/doc/vnet-xend.txt	Thu Feb 09 16:12:11 2006 +0100
     8.3 @@ -3,7 +3,7 @@ Vnets: Virtual Networks for Virtual Mach
     8.4  
     8.5  Mike Wray <mike.wray@hp.com>
     8.6  
     8.7 -2005/08/25
     8.8 +2005/12/13
     8.9  
    8.10  0) Introduction
    8.11  ---------------
    8.12 @@ -15,7 +15,7 @@ see the real network, and the real netwo
    8.13  
    8.14  Virtual interfaces on the same vnet can be on the same machine
    8.15  or on different machines, they can still talk. The hosting machines
    8.16 -can even be on different subnets if you run vnetd to forward,
    8.17 +can even be on different subnets if you configure vnet forwarding,
    8.18  or have multicast routing enabled.
    8.19  
    8.20  
    8.21 @@ -34,7 +34,7 @@ Configure the network script:
    8.22  
    8.23  Restart xend.
    8.24  
    8.25 -Alternatively insert the vnet module using vnet-insert,
    8.26 +Alternatively insert the vnet module using 'vn insmod',
    8.27  preferably before xend starts.
    8.28  
    8.29  2) Creating vnets
    8.30 @@ -47,14 +47,14 @@ xm vnet-create <vnet config file>
    8.31  
    8.32  For example, if vnet97.sxp contains:
    8.33  
    8.34 -(vnet (id 97) (bridge vnet97) (vnetif vnetif97) (security none))
    8.35 +(vnet (id 97) (bridge vnet97) (vnetif vnif97) (security none))
    8.36  
    8.37  do
    8.38  
    8.39  xm vnet-create vnet97.sxp
    8.40  
    8.41  This will define a vnet with id 97 and no security. The bridge for the
    8.42 -vnet is called vnet97 and the virtual interface for it is vnetif97.
    8.43 +vnet is called vnet97 and the virtual interface for it is vnif97.
    8.44  To add an interface on a vm to this vnet simply set its bridge to vnet97
    8.45  in its configuration.
    8.46  
    8.47 @@ -66,6 +66,22 @@ In sxp:
    8.48  
    8.49  (dev (vif (mac aa:00:00:01:02:03) (bridge vnet97)))
    8.50  
    8.51 +By default vnets use udp encapsulation, but if you use etherip encapsulation
    8.52 +you will also have to reduce the MTU of the corresponding
    8.53 +device in the domain (because of the tunneling). Reducing the MTU may improve
    8.54 +performance for udp encapsulation, but is not necessary.
    8.55 +
    8.56 +For example, for eth0 (in the domain, not dom0) use
    8.57 +
    8.58 +ifconfig eth0 mtu 1400
    8.59 +
    8.60 +or, better, put
    8.61 +
    8.62 +MTU=1400
    8.63 +
    8.64 +in /etc/sysconfig/network-scripts/ifcfg-eth0. You may also have to change or remove
    8.65 +cached config files for eth0 under /etc/sysconfig/networking.
    8.66 +
    8.67  Once configured, vnets are persistent in the xend database.
    8.68  To remove a vnet use
    8.69  
    8.70 @@ -75,9 +91,13 @@ To list vnets use
    8.71  
    8.72  xm vnet-list
    8.73  
    8.74 -To get information on a vnet id use
    8.75 +To get information on one or more vnet ids use
    8.76  
    8.77 -xm vnet-list <vnet id>
    8.78 +xm vnet-list <vnet id>...
    8.79 +
    8.80 +You can also manage vnets using the vn utility which talks
    8.81 +directly to the vnet implementation. The source is in ../scripts/vn
    8.82 +and is installed in /usr/sbin/vn.
    8.83  
    8.84  3) Troubleshooting
    8.85  ------------------
    8.86 @@ -87,21 +107,19 @@ If a vnet has been configured it should 
    8.87  Its bridge and interface should appear in 'ifconfig'.
    8.88  It should also show in 'brctl show', with its attached interfaces.
    8.89  
    8.90 -You can 'see into' a vnet from dom0 if you put an IP address on the bridge
    8.91 -and configure its MAC address as a vif.
    8.92 -For example, if you have vnet97 with a vm with ip addr 10.0.0.12 on it,
    8.93 -and <mac> is the MAC address of vnet97 (use ifconfig), then
    8.94 +You can 'see into' a vnet from dom0 if you put an IP address on the bridge.
    8.95 +For example, if you have vnet97 and a vm with ip addr 10.0.0.12 connected to it,
    8.96 +then
    8.97  
    8.98 -echo '(vif.add (vnet 97) (vmac <mac>))' >/proc/vnet/policy
    8.99  ifconfig vnet97 10.0.0.20 up
   8.100  
   8.101  should let you ping 10.0.0.12 via the vnet97 bridge.
   8.102 -This works even if the vm with vif 10.0.0.12 is on another
   8.103 -machine (it only works locally if you don't use vif.add).
   8.104  
   8.105  4) Examples
   8.106  -----------
   8.107  
   8.108 +These assume a vnet with a bridge 'vnet97' has been created.
   8.109 +
   8.110  Here's the full config for a vm on vnet 97, using ip addr 10.0.0.12:
   8.111  
   8.112  (vm
   8.113 @@ -143,7 +161,7 @@ If you run another vm on the same vnet:
   8.114  the vms should be able to talk over the vnet. Check with ping.
   8.115  If they are both on the same machine the connection will simply
   8.116  be the vnet97 bridge, if they are on separate machines their
   8.117 -packets will be tunneled in etherip. They should be able to
   8.118 +packets will be tunneled in udp (or etherip). They should be able to
   8.119  see each other, but not the real network.
   8.120  
   8.121  
     9.1 --- a/tools/vnet/examples/Makefile	Thu Feb 09 16:09:00 2006 +0100
     9.2 +++ b/tools/vnet/examples/Makefile	Thu Feb 09 16:12:11 2006 +0100
     9.3 @@ -1,15 +1,19 @@
     9.4  # -*- mode: Makefile; -*-
     9.5  #============================================================================
     9.6  
     9.7 -XEN_SCRIPT_DIR:=/etc/xen/scripts
     9.8 +INSTALL		= install
     9.9 +INSTALL_PROG	= $(INSTALL) -m0755
    9.10 +INSTALL_DIR	= $(INSTALL) -d -m0755
    9.11 +
    9.12 +XEN_SCRIPT_DIR  = $(DESTDIR)/etc/xen/scripts
    9.13  
    9.14  .PHONY: all install clean
    9.15  
    9.16  all:
    9.17  
    9.18  install:
    9.19 -	install -m 0755 -d $(DESTDIR)$(XEN_SCRIPT_DIR)
    9.20 -	install -m 0554 network-vnet $(DESTDIR)$(XEN_SCRIPT_DIR)
    9.21 -	install -m 0554 vnet-insert $(DESTDIR)$(XEN_SCRIPT_DIR)
    9.22 +	$(INSTALL_DIR) $(XEN_SCRIPT_DIR)
    9.23 +	$(INSTALL_PROG) network-vnet $(XEN_SCRIPT_DIR)
    9.24 +	$(INSTALL_PROG) vnet-insert $(XEN_SCRIPT_DIR)
    9.25  
    9.26  clean:
    9.27 \ No newline at end of file
    10.1 --- a/tools/vnet/examples/vnet97.sxp	Thu Feb 09 16:09:00 2006 +0100
    10.2 +++ b/tools/vnet/examples/vnet97.sxp	Thu Feb 09 16:12:11 2006 +0100
    10.3 @@ -1,2 +1,2 @@
    10.4  # Vnet configuration for a vnet with id 97 and no security.
    10.5 -(vnet (id 97) (bridge vnet97) (vnetif vnetif97) (security none))
    10.6 +(vnet (id 97) (bridge vnet97) (vnetif vnif97) (security none))
    11.1 --- a/tools/vnet/examples/vnet98.sxp	Thu Feb 09 16:09:00 2006 +0100
    11.2 +++ b/tools/vnet/examples/vnet98.sxp	Thu Feb 09 16:12:11 2006 +0100
    11.3 @@ -1,2 +1,2 @@
    11.4  # Vnet configuration for a vnet with id 98 and message authentication.
    11.5 -(vnet (id 98) (bridge vnet98) (vnetif vnetif98) (security auth))
    11.6 +(vnet (id 98) (bridge vnet98) (vnetif vnif98) (security auth))
    12.1 --- a/tools/vnet/examples/vnet99.sxp	Thu Feb 09 16:09:00 2006 +0100
    12.2 +++ b/tools/vnet/examples/vnet99.sxp	Thu Feb 09 16:12:11 2006 +0100
    12.3 @@ -1,2 +1,2 @@
    12.4  # Vnet configuration for a vnet with id 99 and message confidentiality.
    12.5 -(vnet (id 99) (bridge vnet99) (vnetif vnetif99) (security conf))
    12.6 +(vnet (id 99) (bridge vnet99) (vnif vnetif99) (security conf))
    13.1 --- a/tools/vnet/libxutil/Makefile	Thu Feb 09 16:09:00 2006 +0100
    13.2 +++ b/tools/vnet/libxutil/Makefile	Thu Feb 09 16:12:11 2006 +0100
    13.3 @@ -14,7 +14,7 @@ LIB_SRCS :=
    13.4  LIB_SRCS += allocate.c
    13.5  LIB_SRCS += enum.c
    13.6  LIB_SRCS += file_stream.c
    13.7 -LIB_SRCS += gzip_stream.c
    13.8 +#LIB_SRCS += gzip_stream.c
    13.9  LIB_SRCS += hash_table.c
   13.10  LIB_SRCS += iostream.c
   13.11  LIB_SRCS += lexis.c
   13.12 @@ -45,9 +45,11 @@ LIB      += libxutil.a
   13.13  
   13.14  all: build
   13.15  
   13.16 -build: check-for-zlib
   13.17 +build: #check-for-zlib
   13.18  	$(MAKE) $(LIB)
   13.19  
   13.20 +gzip_stream.o: check-for-zlib
   13.21 +
   13.22  libxutil.so: libxutil.so.$(MAJOR)
   13.23  	ln -sf $^ $@
   13.24  
    14.1 --- a/tools/vnet/libxutil/hash_table.c	Thu Feb 09 16:09:00 2006 +0100
    14.2 +++ b/tools/vnet/libxutil/hash_table.c	Thu Feb 09 16:12:11 2006 +0100
    14.3 @@ -1,5 +1,5 @@
    14.4  /*
    14.5 - * Copyright (C) 2001 - 2004 Mike Wray <mike.wray@hp.com>
    14.6 + * Copyright (C) 2001 - 2005 Mike Wray <mike.wray@hp.com>
    14.7   *
    14.8   * This library is free software; you can redistribute it and/or modify
    14.9   * it under the terms of the GNU Lesser General Public License as published by
   14.10 @@ -26,8 +26,6 @@
   14.11  #  include <stddef.h>
   14.12  #endif
   14.13  
   14.14 -//#include <limits.h>
   14.15 -
   14.16  #include "allocate.h"
   14.17  #include "hash_table.h"
   14.18  
   14.19 @@ -40,86 +38,129 @@
   14.20   * buckets in the table changes.
   14.21   */
   14.22  
   14.23 -/*==========================================================================*/
   14.24 -/** Number of bits in half a word. */
   14.25 -//#if __WORDSIZE == 64
   14.26 -//#define HALF_WORD_BITS 32
   14.27 -//#else
   14.28 -#define HALF_WORD_BITS 16
   14.29 -//#endif
   14.30 -
   14.31 -/** Mask for lo half of a word. On 32-bit this is 
   14.32 - * (1<<16) - 1 = 65535 = 0xffff
   14.33 - * It's 4294967295 = 0xffffffff on 64-bit.
   14.34 - */
   14.35 -#define LO_HALF_MASK ((1 << HALF_WORD_BITS) - 1)
   14.36 -
   14.37 -/** Get the lo half of a word. */
   14.38 -#define LO_HALF(x) ((x) & LO_HALF_MASK)
   14.39 -
   14.40 -/** Get the hi half of a word. */
   14.41 -#define HI_HALF(x) ((x) >> HALF_WORD_BITS)
   14.42 -
   14.43 -/** Do a full hash on both inputs, using DES-style non-linear scrambling.
   14.44 - * Both inputs are replaced with the results of the hash.
   14.45 - *
   14.46 - * @param pleft input/output word
   14.47 - * @param pright input/output word
   14.48 - */
   14.49 -void pseudo_des(unsigned long *pleft, unsigned long *pright){
   14.50 -    // Bit-rich mixing constant.
   14.51 -    static const unsigned long a_mixer[] = {
   14.52 -        0xbaa96887L, 0x1e17d32cL, 0x03bcdc3cL, 0x0f33d1b2L, };
   14.53 +/*============================================================================*/
   14.54 +/*
   14.55 +--------------------------------------------------------------------
   14.56 +lookup2.c, by Bob Jenkins, December 1996, Public Domain.
   14.57 +You can use this free for any purpose.  It has no warranty.
   14.58 +--------------------------------------------------------------------
   14.59 +*/
   14.60  
   14.61 -    // Bit-rich mixing constant.
   14.62 -    static const unsigned long b_mixer[] = {
   14.63 -        0x4b0f3b58L, 0xe874f0c3L, 0x6955c5a6L, 0x55a7ca46L, };
   14.64 -
   14.65 -    // Number of iterations - must be 2 or 4.
   14.66 -    static const int ncycle = 4;
   14.67 -    //static const int ncycle = 2;
   14.68 -
   14.69 -    unsigned long left = *pleft, right = *pright;
   14.70 -    unsigned long v, v_hi, v_lo;
   14.71 -    int i;
   14.72 +#define hashsize(n) ((ub4)1<<(n))
   14.73 +#define hashmask(n) (hashsize(n)-1)
   14.74  
   14.75 -    for(i=0; i<ncycle; i++){
   14.76 -        // Flip some bits in right to get v.
   14.77 -        v = right;
   14.78 -        v ^= a_mixer[i];
   14.79 -        // Get lo and hi halves of v.
   14.80 -        v_lo = LO_HALF(v);
   14.81 -        v_hi = HI_HALF(v);
   14.82 -        // Non-linear mix of the halves of v.
   14.83 -        v = ((v_lo * v_lo) + ~(v_hi * v_hi));
   14.84 -        // Swap the halves of v.
   14.85 -        v = (HI_HALF(v) | (LO_HALF(v) << HALF_WORD_BITS));
   14.86 -        // Flip some bits.
   14.87 -        v ^= b_mixer[i];
   14.88 -        // More non-linear mixing.
   14.89 -        v += (v_lo * v_hi);
   14.90 -        v ^= left;
   14.91 -        left = right;
   14.92 -        right = v;
   14.93 -    }
   14.94 -    *pleft = left;
   14.95 -    *pright = right;
   14.96 +/*
   14.97 +--------------------------------------------------------------------
   14.98 +mix -- mix 3 32-bit values reversibly.
   14.99 +For every delta with one or two bit set, and the deltas of all three
  14.100 +  high bits or all three low bits, whether the original value of a,b,c
  14.101 +  is almost all zero or is uniformly distributed,
  14.102 +* If mix() is run forward or backward, at least 32 bits in a,b,c
  14.103 +  have at least 1/4 probability of changing.
  14.104 +* If mix() is run forward, every bit of c will change between 1/3 and
  14.105 +  2/3 of the time.  (Well, 22/100 and 78/100 for some 2-bit deltas.)
  14.106 +mix() was built out of 36 single-cycle latency instructions in a 
  14.107 +  structure that could supported 2x parallelism, like so:
  14.108 +      a -= b; 
  14.109 +      a -= c; x = (c>>13);
  14.110 +      b -= c; a ^= x;
  14.111 +      b -= a; x = (a<<8);
  14.112 +      c -= a; b ^= x;
  14.113 +      c -= b; x = (b>>13);
  14.114 +      ...
  14.115 +  Unfortunately, superscalar Pentiums and Sparcs can't take advantage 
  14.116 +  of that parallelism.  They've also turned some of those single-cycle
  14.117 +  latency instructions into multi-cycle latency instructions.  Still,
  14.118 +  this is the fastest good hash I could find.  There were about 2^^68
  14.119 +  to choose from.  I only looked at a billion or so.
  14.120 +--------------------------------------------------------------------
  14.121 +*/
  14.122 +#define mix(a,b,c) \
  14.123 +{ \
  14.124 +  a -= b; a -= c; a ^= (c>>13); \
  14.125 +  b -= c; b -= a; b ^= (a<<8); \
  14.126 +  c -= a; c -= b; c ^= (b>>13); \
  14.127 +  a -= b; a -= c; a ^= (c>>12);  \
  14.128 +  b -= c; b -= a; b ^= (a<<16); \
  14.129 +  c -= a; c -= b; c ^= (b>>5); \
  14.130 +  a -= b; a -= c; a ^= (c>>3);  \
  14.131 +  b -= c; b -= a; b ^= (a<<10); \
  14.132 +  c -= a; c -= b; c ^= (b>>15); \
  14.133  }
  14.134  
  14.135 -/** Hash a string.
  14.136 - *
  14.137 - * @param s input to hash
  14.138 - * @return hashcode
  14.139 - */
  14.140 -Hashcode hash_string(char *s){
  14.141 -    Hashcode h = 0;
  14.142 -    if(s){
  14.143 -        for( ; *s; s++){
  14.144 -            h = hash_2ul(h, *s);
  14.145 -        }
  14.146 -    }
  14.147 -    return h;
  14.148 +/*
  14.149 +--------------------------------------------------------------------
  14.150 +hash() -- hash a variable-length key into a 32-bit value
  14.151 +  k     : the key (the unaligned variable-length array of bytes)
  14.152 +  len   : the length of the key, counting by bytes
  14.153 +  level : can be any 4-byte value
  14.154 +Returns a 32-bit value.  Every bit of the key affects every bit of
  14.155 +the return value.  Every 1-bit and 2-bit delta achieves avalanche.
  14.156 +About 36+6len instructions.
  14.157 +
  14.158 +The best hash table sizes are powers of 2.  There is no need to do
  14.159 +mod a prime (mod is sooo slow!).  If you need less than 32 bits,
  14.160 +use a bitmask.  For example, if you need only 10 bits, do
  14.161 +  h = (h & hashmask(10));
  14.162 +In which case, the hash table should have hashsize(10) elements.
  14.163 +
  14.164 +If you are hashing n strings (ub1 **)k, do it like this:
  14.165 +  for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h);
  14.166 +
  14.167 +By Bob Jenkins, 1996.  bob_jenkins@burtleburtle.net.  You may use this
  14.168 +code any way you wish, private, educational, or commercial.  It's free.
  14.169 +
  14.170 +See http://burlteburtle.net/bob/hash/evahash.html
  14.171 +Use for hash table lookup, or anything where one collision in 2^32 is
  14.172 +acceptable.  Do NOT use for cryptographic purposes.
  14.173 +--------------------------------------------------------------------
  14.174 +*/
  14.175 +
  14.176 +ub4 hash(const ub1 *k, ub4 length, ub4 initval)
  14.177 +//register ub1 *k;        /* the key */
  14.178 +//register ub4  length;   /* the length of the key */
  14.179 +//register ub4  initval;    /* the previous hash, or an arbitrary value */
  14.180 +{
  14.181 +    /*register*/ ub4 a,b,c,len;
  14.182 +
  14.183 +   /* Set up the internal state */
  14.184 +   len = length;
  14.185 +   a = b = 0x9e3779b9;  /* the golden ratio; an arbitrary value */
  14.186 +   c = initval;           /* the previous hash value */
  14.187 +
  14.188 +   /*---------------------------------------- handle most of the key */
  14.189 +   while (len >= 12)
  14.190 +   {
  14.191 +      a += (k[0] +((ub4)k[1]<<8) +((ub4)k[2]<<16) +((ub4)k[3]<<24));
  14.192 +      b += (k[4] +((ub4)k[5]<<8) +((ub4)k[6]<<16) +((ub4)k[7]<<24));
  14.193 +      c += (k[8] +((ub4)k[9]<<8) +((ub4)k[10]<<16)+((ub4)k[11]<<24));
  14.194 +      mix(a,b,c);
  14.195 +      k += 12; len -= 12;
  14.196 +   }
  14.197 +
  14.198 +   /*------------------------------------- handle the last 11 bytes */
  14.199 +   c += length;
  14.200 +   switch(len)              /* all the case statements fall through */
  14.201 +   {
  14.202 +   case 11: c+=((ub4)k[10]<<24);
  14.203 +   case 10: c+=((ub4)k[9]<<16);
  14.204 +   case 9 : c+=((ub4)k[8]<<8);
  14.205 +      /* the first byte of c is reserved for the length */
  14.206 +   case 8 : b+=((ub4)k[7]<<24);
  14.207 +   case 7 : b+=((ub4)k[6]<<16);
  14.208 +   case 6 : b+=((ub4)k[5]<<8);
  14.209 +   case 5 : b+=k[4];
  14.210 +   case 4 : a+=((ub4)k[3]<<24);
  14.211 +   case 3 : a+=((ub4)k[2]<<16);
  14.212 +   case 2 : a+=((ub4)k[1]<<8);
  14.213 +   case 1 : a+=k[0];
  14.214 +     /* case 0: nothing left to add */
  14.215 +   }
  14.216 +   mix(a,b,c);
  14.217 +   /*-------------------------------------------- report the result */
  14.218 +   return c;
  14.219  }
  14.220 +/*============================================================================*/
  14.221  
  14.222  /** Get the bucket for a hashcode in a hash table.
  14.223   *
  14.224 @@ -132,28 +173,22 @@ inline HTBucket * get_bucket(HashTable *
  14.225  }
  14.226  
  14.227  /** Initialize a hash table.
  14.228 - * Can be safely called more than once.
  14.229   *
  14.230   * @param table to initialize
  14.231   */
  14.232 -void HashTable_init(HashTable *table){
  14.233 +static void HashTable_init(HashTable *table){
  14.234      int i;
  14.235  
  14.236 -    if(!table->init_done){
  14.237 -        table->init_done = 1;
  14.238 -        table->next_id = 0;
  14.239 -        for(i=0; i<table->buckets_n; i++){
  14.240 -            HTBucket *bucket = get_bucket(table, i);
  14.241 -            bucket->head = 0;
  14.242 -            bucket->count = 0;
  14.243 -        }
  14.244 -        table->entry_count = 0;
  14.245 +    for(i = 0; i < table->buckets_n; i++){
  14.246 +        HTBucket *bucket = get_bucket(table, i);
  14.247 +        bucket->head = NULL;
  14.248 +        bucket->count = 0;
  14.249      }
  14.250 +    table->entry_count = 0;
  14.251  }
  14.252  
  14.253  /** Allocate a new hashtable.
  14.254   * If the number of buckets is not positive the default is used.
  14.255 - * The number of buckets should usually be prime.
  14.256   *
  14.257   * @param buckets_n number of buckets
  14.258   * @return new hashtable or null
  14.259 @@ -167,7 +202,7 @@ HashTable *HashTable_new(int buckets_n){
  14.260      z->buckets = (HTBucket*)allocate(buckets_n * sizeof(HTBucket));
  14.261      if(!z->buckets){
  14.262          deallocate(z);
  14.263 -        z = 0;
  14.264 +        z = NULL;
  14.265          goto exit;
  14.266      }
  14.267      z->buckets_n = buckets_n;
  14.268 @@ -233,7 +268,7 @@ int HashTable_set_buckets_n(HashTable *t
  14.269          goto exit;
  14.270      }
  14.271      table->buckets_n = buckets_n;
  14.272 -    for(i=0; i<old_buckets_n; i++){
  14.273 +    for(i=0; i < old_buckets_n; i++){
  14.274          HTBucket *bucket = old_buckets + i;
  14.275          HTEntry *entry, *next;
  14.276          for(entry = bucket->head; entry; entry = next){
  14.277 @@ -305,7 +340,7 @@ inline void HTEntry_free(HTEntry *z){
  14.278   * @param entry to free
  14.279   */
  14.280  inline void HashTable_free_entry(HashTable *table, HTEntry *entry){
  14.281 -    if(!entry)return;
  14.282 +    if(!entry) return;
  14.283      if(table && table->entry_free_fn){
  14.284          table->entry_free_fn(table, entry);
  14.285      } else {
  14.286 @@ -325,7 +360,7 @@ inline void HashTable_free_entry(HashTab
  14.287  inline HTEntry * HashTable_find_entry(HashTable *table, Hashcode hashcode,
  14.288  				      TableTestFn *test_fn, TableArg arg){
  14.289      HTBucket *bucket;
  14.290 -    HTEntry *entry = 0;
  14.291 +    HTEntry *entry = NULL;
  14.292      HTEntry *next;
  14.293  
  14.294      bucket = get_bucket(table, hashcode);
  14.295 @@ -346,7 +381,7 @@ inline HTEntry * HashTable_find_entry(Ha
  14.296   * @return 1 if equal, 0 otherwise
  14.297   */
  14.298  inline int HashTable_key_equal(HashTable *table, void *key1, void *key2){
  14.299 -    return (table->key_equal_fn ? table->key_equal_fn(key1, key2) : key1==key2);
  14.300 +    return (table->key_equal_fn ? table->key_equal_fn(key1, key2) : key1 == key2);
  14.301  }
  14.302  
  14.303  /** Compute the hashcode of a hashtable key.
  14.304 @@ -358,7 +393,9 @@ inline int HashTable_key_equal(HashTable
  14.305   * @return hashcode
  14.306   */
  14.307  inline Hashcode HashTable_key_hash(HashTable *table, void *key){
  14.308 -    return (table->key_hash_fn ? table->key_hash_fn(key) : hash_ul((unsigned long)key));
  14.309 +    return (table->key_hash_fn 
  14.310 +            ? table->key_hash_fn(key)
  14.311 +            : hash_hvoid(0, &key, sizeof(key)));
  14.312  }
  14.313  
  14.314  /** Test if an entry has a given key.
  14.315 @@ -378,16 +415,10 @@ static inline int has_key(TableArg arg, 
  14.316   * @param key to look for
  14.317   * @return entry if found, null otherwise
  14.318   */
  14.319 -#if 0
  14.320 -inline HTEntry * HashTable_get_entry(HashTable *table, void *key){
  14.321 -    TableArg arg = { ptr: key };
  14.322 -    return HashTable_find_entry(table, HashTable_key_hash(table, key), has_key, arg);
  14.323 -}
  14.324 -#else
  14.325  inline HTEntry * HashTable_get_entry(HashTable *table, void *key){
  14.326      Hashcode hashcode;
  14.327      HTBucket *bucket;
  14.328 -    HTEntry *entry = 0;
  14.329 +    HTEntry *entry = NULL;
  14.330      HTEntry *next;
  14.331  
  14.332      hashcode = HashTable_key_hash(table, key);
  14.333 @@ -400,7 +431,6 @@ inline HTEntry * HashTable_get_entry(Has
  14.334      }
  14.335      return entry;
  14.336  }
  14.337 -#endif
  14.338  
  14.339  /** Get the value of an entry with a given key.
  14.340   *
  14.341 @@ -420,7 +450,7 @@ inline void * HashTable_get(HashTable *t
  14.342  void show_buckets(HashTable *table, IOStream *io){
  14.343      int i,j ;
  14.344      IOStream_print(io, "entry_count=%d buckets_n=%d\n", table->entry_count, table->buckets_n);
  14.345 -    for(i=0; i<table->buckets_n; i++){
  14.346 +    for(i=0; i < table->buckets_n; i++){
  14.347          if(0 || table->buckets[i].count>0){
  14.348              IOStream_print(io, "bucket %3d %3d %10p ", i,
  14.349                          table->buckets[i].count,
  14.350 @@ -442,10 +472,9 @@ void show_buckets(HashTable *table, IOSt
  14.351   */
  14.352  static int print_entry(TableArg arg, HashTable *table, HTEntry *entry){
  14.353      IOStream *io = (IOStream*)arg.ptr;
  14.354 -    IOStream_print(io, " b=%4lx h=%08lx i=%08lx |-> e=%8p k=%8p v=%8p\n",
  14.355 +    IOStream_print(io, " b=%4lx h=%08lx |-> e=%8p k=%8p v=%8p\n",
  14.356                  entry->hashcode % table->buckets_n,
  14.357                  entry->hashcode,
  14.358 -                entry->index,
  14.359                  entry, entry->key, entry->value);
  14.360      return 0;
  14.361  }
  14.362 @@ -461,21 +490,6 @@ void HashTable_print(HashTable *table, I
  14.363  }
  14.364  /*==========================================================================*/
  14.365  
  14.366 -/** Get the next entry id to use for a table.
  14.367 - *
  14.368 - * @param table hash table
  14.369 - * @return non-zero entry id
  14.370 - */
  14.371 -static inline unsigned long get_next_id(HashTable *table){
  14.372 -    unsigned long id;
  14.373 -
  14.374 -    if(table->next_id == 0){
  14.375 -        table->next_id = 1;
  14.376 -    }
  14.377 -    id = table->next_id++;
  14.378 -    return id;
  14.379 -}
  14.380 -
  14.381  /** Add an entry to the bucket for the
  14.382   * given hashcode.
  14.383   *
  14.384 @@ -488,7 +502,6 @@ static inline unsigned long get_next_id(
  14.385  inline HTEntry * HashTable_add_entry(HashTable *table, Hashcode hashcode, void *key, void *value){
  14.386      HTEntry *entry = HTEntry_new(hashcode, key, value);
  14.387      if(entry){
  14.388 -        entry->index = get_next_id(table);
  14.389          push_on_bucket(table, hashcode, entry);
  14.390          table->entry_count++;
  14.391      }
  14.392 @@ -537,7 +550,6 @@ inline HTEntry * HashTable_add(HashTable
  14.393      return HashTable_add_entry(table, HashTable_key_hash(table, key), key, value);
  14.394  }
  14.395  
  14.396 -
  14.397  /** Remove entries satisfying a test from the bucket for the
  14.398   * given hashcode. 
  14.399   *
  14.400 @@ -550,7 +562,7 @@ inline HTEntry * HashTable_add(HashTable
  14.401  inline int HashTable_remove_entry(HashTable *table, Hashcode hashcode,
  14.402  				  TableTestFn *test_fn, TableArg arg){
  14.403      HTBucket *bucket;
  14.404 -    HTEntry *entry, *prev = 0, *next;
  14.405 +    HTEntry *entry, *prev = NULL, *next;
  14.406      int removed_count = 0;
  14.407  
  14.408      bucket = get_bucket(table, hashcode);
  14.409 @@ -566,7 +578,7 @@ inline int HashTable_remove_entry(HashTa
  14.410              table->entry_count--;
  14.411              removed_count++;
  14.412              HashTable_free_entry(table, entry);
  14.413 -            entry = 0;
  14.414 +            entry = NULL;
  14.415          }
  14.416          prev = entry;
  14.417      }
  14.418 @@ -580,10 +592,9 @@ inline int HashTable_remove_entry(HashTa
  14.419   * @return number of entries removed
  14.420   */
  14.421  inline int HashTable_remove(HashTable *table, void *key){
  14.422 -#if 1
  14.423      Hashcode hashcode;
  14.424      HTBucket *bucket;
  14.425 -    HTEntry *entry, *prev = 0, *next;
  14.426 +    HTEntry *entry, *prev = NULL, *next;
  14.427      int removed_count = 0;
  14.428  
  14.429      hashcode = HashTable_key_hash(table, key);
  14.430 @@ -600,15 +611,11 @@ inline int HashTable_remove(HashTable *t
  14.431              table->entry_count--;
  14.432              removed_count++;
  14.433              HashTable_free_entry(table, entry);
  14.434 -            entry = 0;
  14.435 +            entry = NULL;
  14.436          }
  14.437          prev = entry;
  14.438      }
  14.439      return removed_count;
  14.440 -#else
  14.441 -    return HashTable_remove_entry(table, HashTable_key_hash(table, key),
  14.442 -				  has_key, (TableArg){ ptr: key});
  14.443 -#endif
  14.444  }
  14.445  
  14.446  /** Remove (and free) all the entries in a bucket.
  14.447 @@ -622,7 +629,7 @@ static inline void bucket_clear(HashTabl
  14.448          next = entry->next;
  14.449          HashTable_free_entry(table, entry);
  14.450      }
  14.451 -    bucket->head = 0;
  14.452 +    bucket->head = NULL;
  14.453      table->entry_count -= bucket->count;
  14.454      bucket->count = 0;
  14.455  }
  14.456 @@ -634,7 +641,7 @@ static inline void bucket_clear(HashTabl
  14.457  void HashTable_clear(HashTable *table){
  14.458      int i, n = table->buckets_n;
  14.459  
  14.460 -    for(i=0; i<n; i++){
  14.461 +    for(i = 0; i < n; i++){
  14.462          bucket_clear(table, table->buckets + i);
  14.463      }
  14.464  }
    15.1 --- a/tools/vnet/libxutil/hash_table.h	Thu Feb 09 16:09:00 2006 +0100
    15.2 +++ b/tools/vnet/libxutil/hash_table.h	Thu Feb 09 16:12:11 2006 +0100
    15.3 @@ -1,5 +1,5 @@
    15.4  /*
    15.5 - * Copyright (C) 2001 - 2004 Mike Wray <mike.wray@hp.com>
    15.6 + * Copyright (C) 2001 - 2005 Mike Wray <mike.wray@hp.com>
    15.7   *
    15.8   * This library is free software; you can redistribute it and/or modify
    15.9   * it under the terms of the GNU Lesser General Public License as published by
   15.10 @@ -20,6 +20,7 @@
   15.11  #define _XUTIL_HASH_TABLE_H_
   15.12  
   15.13  #include "iostream.h"
   15.14 +#include "sys_string.h"
   15.15  
   15.16  typedef unsigned long Hashcode;
   15.17  
   15.18 @@ -33,8 +34,6 @@ typedef union TableArg {
   15.19  typedef struct HTEntry {
   15.20      /** Hashcode of the entry's key. */
   15.21      Hashcode hashcode;
   15.22 -    /** Identifier for this entry in the table. */
   15.23 -    int index;
   15.24      /** The key for this entry. */
   15.25      void *key;
   15.26      /** The value in this entry. */
   15.27 @@ -53,8 +52,8 @@ typedef struct HTBucket {
   15.28  
   15.29  /** Default number of buckets in a hash table.
   15.30   * You want enough buckets so the lists in the buckets will typically be short.
   15.31 - * It's a good idea if this is prime, since that will help to spread hashcodes
   15.32 - * around the table.
   15.33 + * If the hash function is good it doesn't matter whether the number of
   15.34 + * buckets is prime or not.
   15.35   */
   15.36  //#define HT_BUCKETS_N 1
   15.37  //#define HT_BUCKETS_N 3
   15.38 @@ -91,14 +90,10 @@ typedef int TableOrderFn(HTEntry *e1, HT
   15.39   * These all default to 0, when default behaviour treating keys as integers is used.
   15.40   */
   15.41  struct HashTable {
   15.42 -    /** Flag indicating whether the table has been initialised. */
   15.43 -    int init_done;
   15.44 -    /** Next value for the id field in inserted rules. */
   15.45 -    unsigned long next_id;
   15.46 +    /** Array of buckets, each with its own list. */
   15.47 +    HTBucket *buckets;
   15.48      /** Number of buckets in the bucket array. */
   15.49      int buckets_n;
   15.50 -    /** Array of buckets, each with its own list. */
   15.51 -    HTBucket *buckets;
   15.52      /** Number of entries in the table. */
   15.53      int entry_count;
   15.54      /** Function to free keys and values in entries. */
   15.55 @@ -126,85 +121,35 @@ extern HTEntry * HashTable_find_entry(Ha
   15.56                                        TableTestFn *test_fn, TableArg arg);
   15.57  extern int HashTable_remove_entry(HashTable *table, Hashcode hashcode,
   15.58                                     TableTestFn *test_fn, TableArg arg);
   15.59 -//extern int HashTable_map(HashTable *table, TableMapFn *map_fn, TableArg arg);
   15.60  extern void HashTable_print(HashTable *table, IOStream *out);
   15.61  extern int HashTable_set_buckets_n(HashTable *table, int buckets_n);
   15.62  extern int HashTable_adjust(HashTable *table, int buckets_min);
   15.63 -extern void pseudo_des(unsigned long *pleft, unsigned long *pright);
   15.64 -extern Hashcode hash_string(char *s);
   15.65  
   15.66  extern int HashTable_order_bucket(HashTable *table, Hashcode hashcode, TableOrderFn *order);
   15.67  
   15.68 -/** Control whether to use hashing based on DES or simple
   15.69 - * hashing. DES hashing is `more random' but much more expensive.
   15.70 - */
   15.71 -#define HASH_PSEUDO_DES 0
   15.72 +typedef unsigned long ub4;
   15.73 +typedef unsigned char ub1;
   15.74  
   15.75 -/** Hash a long using a quick and dirty linear congruential random number generator.
   15.76 - *  See `Numerical Recipes in C', Chapter 7, "An Even Quicker Generator".
   15.77 +extern ub4 hash(const ub1 *k, ub4 length, ub4 initval);
   15.78 +
   15.79 +/** Hash some bytes starting with a given hashcode.
   15.80   *
   15.81 - * @param a value to hash
   15.82 - * @return hashed input
   15.83 - */
   15.84 -static inline unsigned long lcrng_hash(unsigned long a){
   15.85 -    return (1664525L * a + 1013904223L);
   15.86 -}
   15.87 -
   15.88 -/** Hash an unsigned long.
   15.89 - *
   15.90 - * @param a input to hash
   15.91 + * @param h initial hashcode - use 0, a previous hash, or an arbitrary value
   15.92 + * @param b bytes to hash
   15.93 + * @param b_n number of bytes to hash
   15.94   * @return hashcode
   15.95   */
   15.96 -static inline Hashcode hash_ul(unsigned long a){
   15.97 -#if HASH_PSEUDO_DES
   15.98 -    unsigned long left = a;
   15.99 -    unsigned long right = 0L;
  15.100 -    pseudo_des(&left, &right);
  15.101 -    return right;
  15.102 -#else
  15.103 -    a = lcrng_hash(a);
  15.104 -    a = lcrng_hash(a);
  15.105 -    return a;
  15.106 -#endif
  15.107 +static inline Hashcode hash_hvoid(Hashcode h, const void *b, unsigned b_n){
  15.108 +    return hash(b, b_n, h);
  15.109  }
  15.110  
  15.111 -/** Hash two unsigned longs together.
  15.112 +/** Hash a string (null-terminated).
  15.113   *
  15.114 - * @param a input to hash
  15.115 - * @param b input to hash
  15.116 + * @param s input to hash
  15.117   * @return hashcode
  15.118   */
  15.119 -static inline Hashcode hash_2ul(unsigned long a, unsigned long b){
  15.120 -#if HASH_PSEUDO_DES
  15.121 -    unsigned long left = a;
  15.122 -    unsigned long right = b;
  15.123 -    pseudo_des(&left, &right);
  15.124 -    return right;
  15.125 -#else
  15.126 -    a = lcrng_hash(a);
  15.127 -    a ^= b;
  15.128 -    a = lcrng_hash(a);
  15.129 -    return a;
  15.130 -#endif
  15.131 -}
  15.132 -
  15.133 -/** Hash a hashcode and an unsigned long together.
  15.134 - *
  15.135 - * @param a input hashcode
  15.136 - * @param b input to hash
  15.137 - * @return hashcode
  15.138 - */
  15.139 -static inline Hashcode hash_hul(Hashcode a, unsigned long b){
  15.140 -#if HASH_PSEUDO_DES
  15.141 -    unsigned long left = a;
  15.142 -    unsigned long right = b;
  15.143 -    pseudo_des(&left, &right);
  15.144 -    return right;
  15.145 -#else
  15.146 -    a ^= b;
  15.147 -    a = lcrng_hash(a);
  15.148 -    return a;
  15.149 -#endif
  15.150 +static inline Hashcode hash_string(char *s){
  15.151 +    return (s ? hash_hvoid(0, s, strlen(s)) : 0);
  15.152  }
  15.153  
  15.154  /** Macro to declare variables for HashTable_for_each() to use.
    16.1 --- a/tools/vnet/libxutil/mem_stream.c	Thu Feb 09 16:09:00 2006 +0100
    16.2 +++ b/tools/vnet/libxutil/mem_stream.c	Thu Feb 09 16:12:11 2006 +0100
    16.3 @@ -183,6 +183,8 @@ static void mem_put(MemData *data, const
    16.4  static int mem_expand(MemData *data, size_t extra){
    16.5      int err = -ENOMEM;
    16.6      int delta = (extra < delta_min ? delta_min : extra);
    16.7 +    int buf_n;
    16.8 +    char *buf;
    16.9      if(data->buf_max > 0){
   16.10          int delta_max = data->buf_max - data->buf_n;
   16.11          if(delta > delta_max){
   16.12 @@ -190,8 +192,8 @@ static int mem_expand(MemData *data, siz
   16.13              if(delta > delta_max) goto exit;
   16.14          }
   16.15      }
   16.16 -    int buf_n = data->buf_n + delta;
   16.17 -    char *buf = allocate(buf_n);
   16.18 +    buf_n = data->buf_n + delta;
   16.19 +    buf = allocate(buf_n);
   16.20      if(!buf) goto exit;
   16.21      mem_get(data, buf, mem_len(data));
   16.22      data->hi = mem_len(data);
   16.23 @@ -218,9 +220,10 @@ static int mem_expand(MemData *data, siz
   16.24   * @return number of bytes written on success, negative error code otherwise
   16.25   */
   16.26  static int mem_write(IOStream *io, const void *msg, size_t n){
   16.27 +    int room;
   16.28      MemData *data = get_mem_data(io);
   16.29      if(data->err) return -data->err;
   16.30 -    int room = mem_room(data);
   16.31 +    room = mem_room(data);
   16.32      if(n > room){
   16.33          int err = mem_expand(data, n - room);
   16.34          if(err) return err;
   16.35 @@ -238,9 +241,10 @@ static int mem_write(IOStream *io, const
   16.36   * @return number of bytes read on success, negative error code otherwise
   16.37   */
   16.38  static int mem_read(IOStream *io, void *buf, size_t n){
   16.39 +    int k;
   16.40      MemData *data = get_mem_data(io);
   16.41      if(data->err) return -data->err;
   16.42 -    int k = mem_len(data);
   16.43 +    k = mem_len(data);
   16.44      if(n > k){
   16.45          n = k;
   16.46      }
   16.47 @@ -292,8 +296,9 @@ static void mem_free(IOStream *io){
   16.48  IOStream *mem_stream_new_size(size_t buf_n, size_t buf_max){
   16.49      int err = -ENOMEM;
   16.50      MemData *data = ALLOCATE(MemData);
   16.51 +    IOStream *io = NULL;
   16.52      if(!data) goto exit;
   16.53 -    IOStream *io = ALLOCATE(IOStream);
   16.54 +    io = ALLOCATE(IOStream);
   16.55      if(!io) goto exit;
   16.56      if(buf_n <= delta_min){
   16.57          buf_n = delta_min;
    17.1 --- a/tools/vnet/libxutil/sxpr.h	Thu Feb 09 16:09:00 2006 +0100
    17.2 +++ b/tools/vnet/libxutil/sxpr.h	Thu Feb 09 16:12:11 2006 +0100
    17.3 @@ -149,7 +149,7 @@ static inline int eq(Sxpr x, Sxpr y){
    17.4  
    17.5  typedef struct ObjString {
    17.6      int len;
    17.7 -    char data[];
    17.8 +    char data[0];
    17.9  } ObjString;
   17.10  
   17.11  /** An atom. */
   17.12 @@ -318,7 +318,6 @@ typedef struct SxprType {
   17.13      ObjCopyFn *copy;
   17.14  } SxprType;
   17.15  
   17.16 -
   17.17  extern int def_sxpr_type(SxprType *tydef);
   17.18  extern SxprType *get_sxpr_type(int ty);
   17.19  
    18.1 --- a/tools/vnet/libxutil/sxpr_parser.c	Thu Feb 09 16:09:00 2006 +0100
    18.2 +++ b/tools/vnet/libxutil/sxpr_parser.c	Thu Feb 09 16:12:11 2006 +0100
    18.3 @@ -310,6 +310,7 @@ int ParserState_new(ParserStateFn *fn, c
    18.4  void Parser_pop(Parser *p){
    18.5      ParserState *s = p->state;
    18.6      if(!s) return;
    18.7 +    dprintf("Parser_pop> %s\n", s->name);
    18.8      p->state = s->parent;
    18.9      if (p->start_state == s) {
   18.10          p->start_state = NULL;
   18.11 @@ -336,6 +337,7 @@ void Parser_free(Parser *z){
   18.12  }
   18.13  
   18.14  int Parser_push(Parser *p, ParserStateFn *fn, char *name){
   18.15 +    dprintf("Parser_push> %s\n", name);
   18.16      return ParserState_new(fn, name, p->state, &p->state);
   18.17  }
   18.18          
   18.19 @@ -522,7 +524,7 @@ int Parser_ready(Parser *p){
   18.20  }
   18.21  
   18.22  Sxpr Parser_get_val(Parser *p){
   18.23 -    Sxpr v = ONONE;
   18.24 +    Sxpr v = ONONE, w = ONONE;
   18.25      if(CONSP(p->val)){
   18.26      } else if (p->start_state && CONSP(p->start_state->val)){
   18.27          p->val = p->start_state->val;
   18.28 @@ -531,7 +533,7 @@ Sxpr Parser_get_val(Parser *p){
   18.29      }  else {
   18.30          goto exit;
   18.31      }
   18.32 -    Sxpr w = p->val;
   18.33 +    w = p->val;
   18.34      v = CAR(w);
   18.35      p->val = CDR(w);
   18.36      hfree(w);
   18.37 @@ -940,11 +942,13 @@ int Parser_input_eof(Parser *p){
   18.38  int Parser_input(Parser *p, char *buf, int buf_n){
   18.39      int err = 0;
   18.40      int i = 0;
   18.41 -    dprintf("> |%s|\n", buf);
   18.42 +    dprintf("> buf_n=%d\n", buf_n);
   18.43      if(buf_n <= 0){
   18.44 +        buf_n = 0;
   18.45          err = Parser_input_eof(p);
   18.46          goto exit;
   18.47      }
   18.48 +    dprintf("> buf=|%*s|\n", buf_n, buf);
   18.49      for(i = 0; i < buf_n; i++){
   18.50          err = Parser_input_char(p, buf[i]);
   18.51          if(err) goto exit;
    19.1 --- a/tools/vnet/libxutil/sys_net.c	Thu Feb 09 16:09:00 2006 +0100
    19.2 +++ b/tools/vnet/libxutil/sys_net.c	Thu Feb 09 16:12:11 2006 +0100
    19.3 @@ -182,7 +182,7 @@ int get_host_address(const char *name, u
    19.4  #else
    19.5      struct hostent *host = gethostbyname(name);
    19.6      if(!host){
    19.7 -        return -EINVAL;
    19.8 +        return -ENOENT;
    19.9      }
   19.10      *address = ((struct in_addr *)(host->h_addr))->s_addr;
   19.11      return 0;
    20.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    20.2 +++ b/tools/vnet/scripts/Makefile	Thu Feb 09 16:12:11 2006 +0100
    20.3 @@ -0,0 +1,18 @@
    20.4 +# -*- mode: Makefile; -*-
    20.5 +#============================================================================
    20.6 +
    20.7 +INSTALL		= install
    20.8 +INSTALL_PROG	= $(INSTALL) -m0755
    20.9 +INSTALL_DIR	= $(INSTALL) -d -m0755
   20.10 +
   20.11 +SBIN_DIR        = $(DESTDIR)/usr/sbin
   20.12 +
   20.13 +.PHONY: all install clean
   20.14 +
   20.15 +all:
   20.16 +
   20.17 +install:
   20.18 +	$(INSTALL_DIR) $(SBIN_DIR)
   20.19 +	$(INSTALL_PROG) vn $(SBIN_DIR)
   20.20 +
   20.21 +clean:
   20.22 \ No newline at end of file
    21.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    21.2 +++ b/tools/vnet/scripts/vn	Thu Feb 09 16:12:11 2006 +0100
    21.3 @@ -0,0 +1,904 @@
    21.4 +#!/usr/bin/env python2.4
    21.5 +#  -*- mode: python; -*-
    21.6 +#============================================================================
    21.7 +# Copyright (C) 2005, 2006 Mike Wray <mike.wray@hp.com>
    21.8 +#
    21.9 +# This library is free software; you can redistribute it and/or modify
   21.10 +# it under the terms of the GNU Lesser General Public License as published by
   21.11 +# the Free Software Foundation; either version 2.1 of the License, or
   21.12 +# (at your option) any later version.
   21.13 +#
   21.14 +# This library is distributed in the hope that it will be useful,
   21.15 +# but WITHOUT ANY WARRANTY; without even the implied warranty of
   21.16 +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   21.17 +# GNU Lesser General Public License for more details.
   21.18 +#
   21.19 +# You should have received a copy of the GNU Lesser General Public License
   21.20 +# along with this library; if not, write to the Free Software
   21.21 +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   21.22 +#============================================================================
   21.23 +
   21.24 +# Vnet (network virtualization) control utility.
   21.25 +
   21.26 +import os
   21.27 +import os.path
   21.28 +import re
   21.29 +import socket
   21.30 +import sys
   21.31 +from getopt import getopt, GetoptError
   21.32 +
   21.33 +sys.path.append('/usr/lib/python')
   21.34 +sys.path.append('/usr/lib64/python')
   21.35 +
   21.36 +from xen.xend import sxp
   21.37 +from xen.xend.PrettyPrint import prettyprint
   21.38 +
   21.39 +# Path of unix-domain socket to vnetd.
   21.40 +VNETD_PATH = "/tmp/vnetd"
   21.41 +
   21.42 +def vnetd_running():
   21.43 +    return os.path.exists(VNETD_PATH)
   21.44 +
   21.45 +def vnetd_open():
   21.46 +    sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
   21.47 +    sock.connect(VNETD_PATH)
   21.48 +    fi = sock.makefile('r', 0)
   21.49 +    fo = sock.makefile('w', 0)
   21.50 +    return (fi, fo)
   21.51 +
   21.52 +os.defpath += ':/sbin:/usr/sbin:/usr/local/sbin'
   21.53 +CMD_IFCONFIG = 'ifconfig'
   21.54 +CMD_BRCTL    = 'brctl'
   21.55 +
   21.56 +opts = None
   21.57 +
   21.58 +class Opts:
   21.59 +
   21.60 +    def __init__(self, **kwds):
   21.61 +        for (k, v) in kwds.items():
   21.62 +            setattr(self, k, v)
   21.63 +
   21.64 +opts = Opts(verbose=False, dryrun=False)
   21.65 +
   21.66 +def set_opts(val):
   21.67 +    global opts
   21.68 +    opts = val
   21.69 +    return opts
   21.70 +
   21.71 +def cmd(prog, *args):
   21.72 +    """Execute command 'prog' with 'args', optionally printing the command.
   21.73 +    """
   21.74 +    global opts
   21.75 +    command = " ".join([ prog ] + map(str, args))
   21.76 +    if opts.verbose:
   21.77 +        print command
   21.78 +    if not opts.dryrun:
   21.79 +        os.system(command)
   21.80 +
   21.81 +def vif_bridge_add(bridge, vif):
   21.82 +    """Add a network interface to a bridge.
   21.83 +    """
   21.84 +    cmd(CMD_BRCTL, 'addif', bridge, vif)
   21.85 +
   21.86 +def vif_bridge_rem(bridge, vif):
   21.87 +    """Remove a network interface from a bridge.
   21.88 +    """
   21.89 +    cmd(CMD_BRCTL, 'delif', bridge, vif)
   21.90 +
   21.91 +def bridge_create(bridge, **kwd):
   21.92 +    """Create a bridge.
   21.93 +    Defaults hello time to 0, forward delay to 0 and stp off.
   21.94 +    """
   21.95 +    cmd(CMD_BRCTL, 'addbr', bridge)
   21.96 +    if kwd.get('hello', None) is None:
   21.97 +        kwd['hello'] = 0
   21.98 +    if kwd.get('fd', None) is None:
   21.99 +        kwd['fd'] = 0
  21.100 +    if kwd.get('stp', None) is None:
  21.101 +        kwd['stp'] = 'off'
  21.102 +    bridge_set(bridge, **kwd)
  21.103 +    cmd(CMD_IFCONFIG, bridge, "up")
  21.104 +
  21.105 +def bridge_set(bridge, hello=None, fd=None, stp=None):
  21.106 +    """Set bridge parameters.
  21.107 +    """
  21.108 +    if hello is not None:
  21.109 +        cmd(CMD_BRCTL, 'sethello', bridge, hello)
  21.110 +    if fd is not None:
  21.111 +        cmd(CMD_BRCTL, 'setfd', bridge, fd)
  21.112 +    if stp is not None:
  21.113 +        cmd(CMD_BRCTL, 'stp', bridge, stp)
  21.114 +
  21.115 +def bridge_del(bridge):
  21.116 +    """Delete a bridge.
  21.117 +    """
  21.118 +    cmd(CMD_IFCONFIG, bridge, 'down')
  21.119 +    cmd(CMD_BRCTL, 'delbr', bridge)
  21.120 +
  21.121 +class Bridge:
  21.122 +    # Network interfaces are at /sys/class/net/*.
  21.123 +    # A bridge interface has ./bridge dir, ./brif is dir of bridged interfaces
  21.124 +    # (symlinks to the brport dirs).
  21.125 +    # If an interface is bridged ./brport is bridged port info,
  21.126 +    # brport/bridge is a symlink to the bridge.
  21.127 +
  21.128 +    INTERFACE_DIR = "/sys/class/net"
  21.129 +
  21.130 +    def isBridge(klass, dev):
  21.131 +        """Test if a network interface is a bridge.
  21.132 +        """
  21.133 +        devdir = os.path.join(klass.INTERFACE_DIR, dev)
  21.134 +        brdir = os.path.join(devdir, "bridge")
  21.135 +        try:
  21.136 +            os.stat(brdir)
  21.137 +            return True
  21.138 +        except:
  21.139 +            return False
  21.140 +
  21.141 +    isBridge = classmethod(isBridge)
  21.142 +
  21.143 +    def getInterfaces(klass):
  21.144 +        """Get a list of the network interfaces.
  21.145 +        """
  21.146 +        try:
  21.147 +            v = os.listdir(klass.INTERFACE_DIR)
  21.148 +            v.sort()
  21.149 +            return v
  21.150 +        except:
  21.151 +            return []
  21.152 +
  21.153 +    getInterfaces = classmethod(getInterfaces)
  21.154 +
  21.155 +    def getInterfaceAddr(klass, intf):
  21.156 +        intfdir = os.path.join(klass.INTERFACE_DIR, intf)
  21.157 +        addrfile = os.path.join(intfdir, "address")
  21.158 +        try:
  21.159 +            f = file(addrfile, "rb")
  21.160 +        except Exception, ex:
  21.161 +            #print ex
  21.162 +            return None
  21.163 +        try:
  21.164 +            return f.readline().strip()
  21.165 +        finally:
  21.166 +            f.close()
  21.167 +
  21.168 +    getInterfaceAddr = classmethod(getInterfaceAddr)
  21.169 +
  21.170 +    def getBridges(klass):
  21.171 +        """Get a list of the bridges.
  21.172 +        """
  21.173 +        return [ dev for dev in klass.getInterfaces() if klass.isBridge(dev) ]
  21.174 +
  21.175 +    getBridges = classmethod(getBridges)
  21.176 +
  21.177 +    def getBridgeInterfaces(klass, dev):
  21.178 +        """Get a list of the interfaces attached to a bridge.
  21.179 +        """
  21.180 +        devdir = os.path.join(klass.INTERFACE_DIR, dev)
  21.181 +        intfdir = os.path.join(devdir, "brif")
  21.182 +        try:
  21.183 +            v = os.listdir(intfdir)
  21.184 +            v.sort()
  21.185 +            return v
  21.186 +        except:
  21.187 +            return []
  21.188 +
  21.189 +    getBridgeInterfaces = classmethod(getBridgeInterfaces)
  21.190 +
  21.191 +    def getBridge(klass, dev):
  21.192 +        """Get the bridge an interface is attached to (if any).
  21.193 +        """
  21.194 +        devdir = os.path.join(klass.INTERFACE_DIR, dev)
  21.195 +        brfile = os.path.join(devdir, "brport/bridge")
  21.196 +        try:
  21.197 +            brpath = os.readlink(brfile)
  21.198 +            return os.path.basename(brpath)
  21.199 +        except:
  21.200 +            return None
  21.201 +
  21.202 +    getBridge = classmethod(getBridge)
  21.203 +
  21.204 +def vnet_cmd(expr):
  21.205 +    """Send a command expression to the vnet implementation.
  21.206 +    """
  21.207 +    if vnetd_running():
  21.208 +        (fi, fo) = vnetd_open()
  21.209 +    else:
  21.210 +        fi = None
  21.211 +        fo = file("/proc/vnet/policy", "wb")
  21.212 +    try:
  21.213 +        sxp.show(expr, fo)
  21.214 +        fo.flush()
  21.215 +    finally:
  21.216 +        if fi: fi.close()
  21.217 +        if fo: fo.close()
  21.218 +
  21.219 +def varp_flush():
  21.220 +    """Flush the varp cache.
  21.221 +    """
  21.222 +    expr = ['varp.flush']
  21.223 +    return vnet_cmd(expr)
  21.224 +
  21.225 +def vif_add(vnetid, vmac):
  21.226 +    """Tell the vnet implementation to add a vif to a vnet.
  21.227 +    """
  21.228 +    expr = ['vif.add', ['vnet', vnetid], ['vmac', vmac]]
  21.229 +    return vnet_cmd(expr)
  21.230 +
  21.231 +def vif_del(vnetid, vmac):
  21.232 +    """Tell the vnet implementation to delete a vif from a vnet.
  21.233 +    """
  21.234 +    expr = ['vif.del', ['vnet', vnetid], ['vmac', vmac]]
  21.235 +    return vnet_cmd(expr)
  21.236 +
  21.237 +def vnet_add(vnetid, vnetif=None, security=None):
  21.238 +    """Tell the vnet implementation to add a vnet.
  21.239 +    """
  21.240 +    expr = ['vnet.add', ['id', vnetid]]
  21.241 +    if vnetif:
  21.242 +        expr.append(['vnetif', vnetif])
  21.243 +    if security:
  21.244 +        expr.append(['security', security])
  21.245 +    return vnet_cmd(expr)
  21.246 +
  21.247 +def peer_add(addr, port=None):
  21.248 +    expr = ['peer.add', ['addr', addr]]
  21.249 +    if port:
  21.250 +        expr.append(['port', port])
  21.251 +    return vnet_cmd(expr)
  21.252 +    
  21.253 +def peer_del(addr, port=None):
  21.254 +    expr = ['peer.del', ['addr', addr]]
  21.255 +    return vnet_cmd(expr)
  21.256 +
  21.257 +def vnet_del(vnetid):
  21.258 +    """Tell the vnet implementation to delete a vnet.
  21.259 +    """
  21.260 +    expr = ['vnet.del', ['id', vnetid]]
  21.261 +    return vnet_cmd(expr)
  21.262 +
  21.263 +def vnet_create(vnetid, vnetif=None, bridge=None, security=None):
  21.264 +    """Tell the vnet implementation to add a vnet.
  21.265 +    If 'bridge' is non-null, create the bridge and add the vnet interface
  21.266 +    to it.
  21.267 +    """
  21.268 +    vnet_add(vnetid, vnetif=vnetif, security=security)
  21.269 +    val = vnet_lookup(vnetid)
  21.270 +    if not vnetif:
  21.271 +        vnetif = sxp.child_value(val, "vnetif")
  21.272 +    vmac = get_mac(vnetif)
  21.273 +    emac = get_mac("eth0") or get_mac("eth1") or get_mac("eth2")
  21.274 +    if emac and vmac != emac:
  21.275 +        set_mac(vnetif, emac)
  21.276 +    cmd(CMD_IFCONFIG, vnetif, 'up')
  21.277 +    if bridge:
  21.278 +        bridge_create(bridge)
  21.279 +        vif_bridge_add(bridge, vnetif)
  21.280 +    return val
  21.281 +        
  21.282 +def vnet_delete(vnet, delbridge=False):
  21.283 +    """Tell the vnet implementation to delete a vnet.
  21.284 +    If the vnet interface is attached to a bridge,
  21.285 +    remove it from the bridge, and if delbridge is true
  21.286 +    delete the bridge.
  21.287 +    """
  21.288 +    v = vnet_lookup(vnet)    
  21.289 +    if not v:
  21.290 +        raise GetoptError("vnet not found: %s" % vnet)
  21.291 +    vnetid = sxp.child_value(v, "id")
  21.292 +    vnetif = sxp.child_value(v, "vnetif")
  21.293 +    bridge = Bridge.getBridge(vnetif)
  21.294 +    if bridge:
  21.295 +        vif_bridge_rem(bridge, vnetif)
  21.296 +        if delbridge:
  21.297 +            bridge_del(bridge)
  21.298 +    return vnet_del(vnetid)
  21.299 +
  21.300 +def get_mac(intf):
  21.301 +    """Get the mac address of an interface.
  21.302 +    """
  21.303 +    try:
  21.304 +        return Bridge.getInterfaceAddr(intf)
  21.305 +    except:
  21.306 +        pass
  21.307 +
  21.308 +    hwre = re.compile(".*\s+HWaddr\s+(?P<mac>\S*)\s+.*")
  21.309 +    fin = os.popen("%s %s" % (CMD_IFCONFIG, intf), 'r')
  21.310 +    try:
  21.311 +        for x in fin:
  21.312 +            m = hwre.match(x)
  21.313 +            if not m:
  21.314 +                continue
  21.315 +            info = m.groupdict()
  21.316 +            return info['mac']
  21.317 +        return None
  21.318 +    finally:
  21.319 +        fin.close()
  21.320 +
  21.321 +def set_mac(intf, mac):
  21.322 +    cmd(CMD_IFCONFIG, intf, 'down')
  21.323 +    cmd(CMD_IFCONFIG, intf, 'hw', 'ether', mac)
  21.324 +    cmd(CMD_IFCONFIG, intf, 'up')
  21.325 +
  21.326 +def get_addr(host):
  21.327 +    return socket.gethostbyname(host)
  21.328 +
  21.329 +def get_port(srv):
  21.330 +    return srv
  21.331 +
  21.332 +def vnetidof(v):
  21.333 +    """Normalise a vnet id. Adds leading 0 fields to make up 8 if
  21.334 +    there aren't enough. Pads all fields to 4 hex digits.
  21.335 +    """
  21.336 +    try:
  21.337 +        l = v.split(":")
  21.338 +        l = [ int(x or 0, 16) for x in l ]
  21.339 +        l = [ 0 ] * (8 - len(l)) + l
  21.340 +        return ":".join([ "%04x" % x for x in l ])
  21.341 +    except:
  21.342 +        return None
  21.343 +
  21.344 +def vnet_lookup(vnet, vnets=None):
  21.345 +    """Find the vnet with the given vnet id or vnet interface.
  21.346 +
  21.347 +    @param vnet id or interface
  21.348 +    @param vnets list of vnet info to use (get from implementation if None)
  21.349 +    @return vnet info or None if not found
  21.350 +    """
  21.351 +    vnetid = vnetidof(vnet)
  21.352 +    if vnets is None:
  21.353 +        vnets = vnet_list()
  21.354 +    for v in vnets:
  21.355 +        vid = sxp.child_value(v, "id")
  21.356 +        if vid == vnet or vid == vnetid:
  21.357 +            return v
  21.358 +        if sxp.child_value(v, "vnetif") == vnet:
  21.359 +            return v
  21.360 +    return None
  21.361 +
  21.362 +def get_vnetid(vnet):
  21.363 +    """Get the normalised vnet id of the given vnet id or vnet interface.
  21.364 +    Raises an error if the vnet cannot be found.
  21.365 +    """
  21.366 +    v = vnet_lookup(vnet)
  21.367 +    if not v:
  21.368 +        raise GetoptError("vnet not found: %s" % vnet)
  21.369 +    vnetid = sxp.child_value(v, "id")
  21.370 +    return vnetid
  21.371 +
  21.372 +def vif_list():
  21.373 +    """Get the list of vif info from the vnet implementation.
  21.374 +    """
  21.375 +    if vnetd_running():
  21.376 +        (fi, fo) = vnetd_open()
  21.377 +        sxp.show(['vif.list'], fo)
  21.378 +        fo.flush()
  21.379 +    else:
  21.380 +        fi = file("/proc/vnet/vifs")
  21.381 +        fo = None
  21.382 +    try:
  21.383 +        return sxp.parse(fi) or []
  21.384 +    finally:
  21.385 +        if fi: fi.close()
  21.386 +        if fo: fo.close()
  21.387 +
  21.388 +def vnets_filter(vnetlist, vnets):
  21.389 +    """Filter a list of vnet info by a list of vnet ids or interfaces.
  21.390 +    """
  21.391 +    if vnets is None:
  21.392 +        val = vnetlist
  21.393 +    else:
  21.394 +        val = []
  21.395 +        for x in vnets:
  21.396 +            v = vnet_lookup(x, vnets=vnetlist)
  21.397 +            if not v:
  21.398 +                continue
  21.399 +            val.append(v)
  21.400 +    return val
  21.401 +
  21.402 +def vnet_list(vnets=None):
  21.403 +    """Get the list of vnet info from the vnet implementation,
  21.404 +    sorted by vnet id.
  21.405 +
  21.406 +    @param vnets list of vnet ids or interfaces to filter the results by
  21.407 +    """
  21.408 +    if vnetd_running():
  21.409 +        (fi, fo) = vnetd_open()
  21.410 +        sxp.show(['vnet.list'], fo)
  21.411 +        fo.flush()
  21.412 +    else:
  21.413 +        fi = file("/proc/vnet/vnets")
  21.414 +        fo = None
  21.415 +    try:
  21.416 +        val = vnets_filter(sxp.parse(fi) or [], vnets)
  21.417 +        val.sort(lambda x, y:
  21.418 +                   cmp(sxp.child_value(x, "id"),
  21.419 +                       sxp.child_value(y, "id")))
  21.420 +        return val
  21.421 +    finally:
  21.422 +        if fi: fi.close()
  21.423 +        if fo: fo.close()
  21.424 +        
  21.425 +def vnif_list(vnets=None):
  21.426 +    """Get the list of vnet interface names from the vnet implementation.
  21.427 +
  21.428 +    @param vnets list of vnet ids or interfaces to filter the results by
  21.429 +    """
  21.430 +    vnifs = []
  21.431 +    for v in vnet_list(vnets=vnets):
  21.432 +        vnetif = sxp.child_value(v, "vnetif")
  21.433 +        if vnetif:
  21.434 +            vnifs.append(vnetif)
  21.435 +    return vnifs
  21.436 +        
  21.437 +def varp_list():
  21.438 +    """Get the list of varp info from the vnet implementation.
  21.439 +    """
  21.440 +    if vnetd_running():
  21.441 +        (fi, fo) = vnetd_open()
  21.442 +        sxp.show(['varp.list'], fo)
  21.443 +        fo.flush()
  21.444 +    else:
  21.445 +        fi = file("/proc/vnet/varp")
  21.446 +        fo = None
  21.447 +    try:
  21.448 +        return sxp.parse(fi) or []
  21.449 +    finally:
  21.450 +        if fi: fi.close()
  21.451 +        if fo: fo.close()
  21.452 +
  21.453 +def peer_list():
  21.454 +    if vnetd_running():
  21.455 +        (fi, fo) = vnetd_open()
  21.456 +        sxp.show(['peer.list'], fo)
  21.457 +        fo.flush()
  21.458 +    else:
  21.459 +        fi = file("/proc/vnet/peers")
  21.460 +        fo = None
  21.461 +    try:
  21.462 +        return sxp.parse(fi) or []
  21.463 +    finally:
  21.464 +        if fi: fi.close()
  21.465 +        if fo: fo.close()
  21.466 +
  21.467 +class Opt:
  21.468 +    """Declares command-line options for a command.
  21.469 +    """
  21.470 +
  21.471 +    def getopt(klass, argv, opts, args):
  21.472 +        """Get options and args from argv.
  21.473 +        The value opts in the return value has an attribute for
  21.474 +        eacho option or arg. The value args in the return value
  21.475 +        is the remaining arguments.
  21.476 +
  21.477 +        @param argv arguments
  21.478 +        @param opts option specifiers (list of Opt objects)
  21.479 +        @param args arg specififiers (list of Arg objects)
  21.480 +        @return (opts, args)
  21.481 +        """
  21.482 +        shortopts = "".join([ x.optShort() for x in opts ])
  21.483 +        longopts  = [ x.optLong() for x in opts ]
  21.484 +        (ovals, oargs) = getopt(argv[1:], shortopts, longopts)
  21.485 +        odir = Opts()
  21.486 +        for x in opts:
  21.487 +            x.setDefault(odir)
  21.488 +        for (k, v) in ovals:
  21.489 +            for x in opts:
  21.490 +                x.setOpt(k, v, odir)
  21.491 +        argc = len(oargs)
  21.492 +        if len(oargs) < len(args):
  21.493 +            raise GetoptError("insufficient arguments for %s" % argv[0])
  21.494 +        for (x, v) in zip(args, oargs):
  21.495 +            x.setArg(v, odir)
  21.496 +        return (odir, oargs[len(args): ])
  21.497 +
  21.498 +    getopt = classmethod(getopt)
  21.499 +
  21.500 +    def gethelp(klass, opts, args):
  21.501 +        l = []
  21.502 +        for x in opts:
  21.503 +            l.append(x.help())
  21.504 +        for x in args:
  21.505 +            l.append(x.help())
  21.506 +        return " ".join(l)
  21.507 +
  21.508 +    gethelp = classmethod(gethelp)
  21.509 +
  21.510 +    """A command=-line option.
  21.511 +
  21.512 +    @param name option name (this attribute is set to value in opts)
  21.513 +    @param short short option flag (single-character string)
  21.514 +    @param long long option name (defaults to option name, pass "" to suppress)
  21.515 +    @param arg argument name (option has no arg if not specified)
  21.516 +    """
  21.517 +    def __init__(self, name, short=None, long=None, arg=False):
  21.518 +        self.name = name
  21.519 +        self.short = short
  21.520 +        if long is None:
  21.521 +            long = name
  21.522 +        elif not long:
  21.523 +            long = None
  21.524 +        self.long = long
  21.525 +        self.arg = arg
  21.526 +
  21.527 +    def help(self):
  21.528 +        s = self.keyShort()
  21.529 +        l = self.keyLong()
  21.530 +        if s and l:
  21.531 +            return "[%s | %s]" % (s, l)
  21.532 +        else:
  21.533 +            return s or l
  21.534 +
  21.535 +    def keyShort(self):
  21.536 +        if self.short:
  21.537 +            return "-%s" % self.short
  21.538 +        else:
  21.539 +            return None
  21.540 +
  21.541 +    def keyLong(self):
  21.542 +        if self.long:
  21.543 +            return "--%s" % self.long
  21.544 +        else:
  21.545 +            return None
  21.546 +
  21.547 +    def optLong(self):
  21.548 +        if not self.long:
  21.549 +            return None
  21.550 +        if self.arg:
  21.551 +            return "%s=" % self.long
  21.552 +        else:
  21.553 +            return self.long
  21.554 +
  21.555 +    def optShort(self):
  21.556 +        if not self.short:
  21.557 +            return None
  21.558 +        if self.arg:
  21.559 +            return "%s:" % self.short
  21.560 +        else:
  21.561 +            return self.short
  21.562 +
  21.563 +    def setDefault(self, vals):
  21.564 +        if self.arg:
  21.565 +            setattr(vals, self.name, None)
  21.566 +        else:
  21.567 +            setattr(vals, self.name, False)
  21.568 +
  21.569 +    def setOpt(self, k, v, vals):
  21.570 +        if k in [ self.keyShort(), self.keyLong() ]:
  21.571 +            if self.arg:
  21.572 +                setattr(vals, self.name, v)
  21.573 +            else:
  21.574 +                if v not in [ None, '' ]:
  21.575 +                    raise GetoptError("option %s does not take an argument" % k)
  21.576 +                setattr(vals, self.name, True)
  21.577 +
  21.578 +class Arg:
  21.579 +
  21.580 +    """A command-line parameter. Args get their values from arguments
  21.581 +    left over after option processing and are assigned in order.
  21.582 +    The value is accessible as the attribute called 'name' in opts.
  21.583 +
  21.584 +    @param name argument name
  21.585 +    """
  21.586 +    def __init__(self, name):
  21.587 +        self.name = name
  21.588 +
  21.589 +    def setArg(self, v, vals):
  21.590 +        setattr(vals, self.name, v)
  21.591 +
  21.592 +    def help(self):
  21.593 +        return "<%s>" % self.name
  21.594 +            
  21.595 +class VnMain:
  21.596 +
  21.597 +    """Methods beginning with this prefix are commands.
  21.598 +    They must all have arguments like this:
  21.599 +
  21.600 +    op_foo(self, argv, args, opts)
  21.601 +
  21.602 +    argv: original command-line arguments
  21.603 +    args: arguments left after option processing
  21.604 +    opts: option and arg values (accessible as attributes)
  21.605 +
  21.606 +    Method options are specified by setting attribute
  21.607 +    .opts on the method to a list of Option objects.
  21.608 +    For args set .args to a list of Arg objects.
  21.609 +    Use .use for short usage string, .help for long help.
  21.610 +
  21.611 +    Each option or arg defines an attribute in opts. For example
  21.612 +    an option with name 'foo' is accessible as 'opts.foo'.
  21.613 +    """
  21.614 +    opPrefix = "op_"
  21.615 +
  21.616 +    def __init__(self, argv):
  21.617 +        if argv:
  21.618 +            self.name = argv[0]
  21.619 +        else:
  21.620 +            self.name = "vn"
  21.621 +        self.argv = argv
  21.622 +        self.argc = len(argv)
  21.623 +
  21.624 +    def error(self, v):
  21.625 +        print >>sys.stderr, "%s: %s" % (self.name, v)
  21.626 +        sys.exit(1)
  21.627 +        
  21.628 +    def getFunction(self, opname):
  21.629 +        key = self.opPrefix + opname.replace("-", "_")
  21.630 +        fn = getattr(self, key, None)
  21.631 +        if not fn:
  21.632 +            raise ValueError("unknown command: %s" % opname)
  21.633 +        return fn
  21.634 +    
  21.635 +    def main(self):
  21.636 +        if self.argc < 2:
  21.637 +            args = ["help"]
  21.638 +        else:
  21.639 +            args = self.argv[1:]
  21.640 +        try:
  21.641 +            fn = self.getFunction(args[0])
  21.642 +        except ValueError, ex:
  21.643 +            self.error(ex)
  21.644 +        try:
  21.645 +            fnopts = self.getOpts(fn)
  21.646 +            fnargs = self.getArgs(fn)
  21.647 +            (opts, parms) = Opt.getopt(args, fnopts, fnargs)
  21.648 +            return fn(args, parms, opts)
  21.649 +        except GetoptError, ex:
  21.650 +            self.error(ex)
  21.651 +        except ValueError, ex:
  21.652 +            self.error(ex)
  21.653 +        except Exception, ex:
  21.654 +            import traceback; traceback.print_exc()
  21.655 +            self.error(ex)
  21.656 +
  21.657 +    def getOpts(self, meth):
  21.658 +        return getattr(meth, "opts", [])
  21.659 +    
  21.660 +    def getArgs(self, meth):
  21.661 +        return getattr(meth, "args", [])
  21.662 +    
  21.663 +    def getUse(self, meth):
  21.664 +        return getattr(meth, "use", "")
  21.665 +    
  21.666 +    def getHelp(self, meth):
  21.667 +        return getattr(meth, "help", "") or self.getUse(meth)
  21.668 +
  21.669 +    def fnHelp(self, meth):
  21.670 +        return Opt.gethelp(self.getOpts(meth), self.getArgs(meth))
  21.671 +
  21.672 +    def printHelp(self, fn, opt_long):
  21.673 +        meth = getattr(self, fn)
  21.674 +        opname = fn[len(self.opPrefix):].replace("_", "-")
  21.675 +        if opt_long:
  21.676 +            help = self.getHelp(meth)
  21.677 +            print "\n  %s" % opname
  21.678 +            if help:
  21.679 +                print "%s" % help
  21.680 +        else:
  21.681 +            use = self.getUse(meth)
  21.682 +            print "  %s %s" % (opname, self.fnHelp(meth))
  21.683 +            if use:
  21.684 +                print "\t\t%s" % use
  21.685 +
  21.686 +    def show_vnif(self, dev):
  21.687 +        cmd(CMD_IFCONFIG, dev)
  21.688 +        bridge = Bridge.getBridge(dev)
  21.689 +        if bridge:
  21.690 +            print "          Bridge:", bridge
  21.691 +            interfaces = Bridge.getBridgeInterfaces(bridge)
  21.692 +            if dev in interfaces:
  21.693 +                interfaces.remove(dev)
  21.694 +            if interfaces:
  21.695 +                print "          Interfaces:", ", ".join(interfaces)
  21.696 +            print
  21.697 +
  21.698 +    def op_help(self, argv, args, opts):
  21.699 +        if opts.long:
  21.700 +            print '%s <command> <options>' % self.name
  21.701 +            print self.long_help
  21.702 +        else:
  21.703 +            print '%s:' % self.name
  21.704 +        l = dir(self)
  21.705 +        l.sort()
  21.706 +        for fn in l:
  21.707 +            if fn.startswith(self.opPrefix):
  21.708 +                self.printHelp(fn, opts.long)
  21.709 +        print
  21.710 +
  21.711 +    op_help.opts = [ Opt('long', short='l') ]
  21.712 +
  21.713 +    def op_vnets(self, argv, args, opts):
  21.714 +        vnets = vnet_list(vnets=args or None)
  21.715 +        for v in vnets:
  21.716 +            prettyprint(v, width=50)
  21.717 +            print
  21.718 +            if not opts.long:
  21.719 +                continue
  21.720 +            vnif = sxp.child_value(v, "vnetif")
  21.721 +            if not vnif:
  21.722 +                continue
  21.723 +            self.show_vnif(vnif)
  21.724 +        if opts.all:
  21.725 +            vnetids = {}
  21.726 +            for v in vnets:
  21.727 +                vnetids[sxp.child_value(v, "id")] = v
  21.728 +            for v in vif_list():
  21.729 +                vnet = sxp.child_value(v, "vnet")
  21.730 +                if vnet not in vnetids:
  21.731 +                    continue
  21.732 +                prettyprint(v)
  21.733 +                print
  21.734 +            for v in varp_list():
  21.735 +                prettyprint(v)
  21.736 +                print
  21.737 +
  21.738 +    op_vnets.opts = [ Opt('all', short='a'), Opt('long', short='l') ]
  21.739 +
  21.740 +    def op_vnifs(self, argv, args, opts):
  21.741 +        vnifs = vnif_list(vnets=args or None)
  21.742 +        for vnif in vnifs:
  21.743 +            self.show_vnif(vnif)
  21.744 +
  21.745 +    def op_vifs(self, argv, args, opts):
  21.746 +        for v in vif_list():
  21.747 +            prettyprint(v)
  21.748 +            print
  21.749 +
  21.750 +    def op_varp(self, argv, args, opts):
  21.751 +        for v in varp_list():
  21.752 +            prettyprint(v)
  21.753 +            print
  21.754 +
  21.755 +    def op_varp_flush(self, argv, args, opts):
  21.756 +        varp_flush()
  21.757 +
  21.758 +    def op_vnet_create(self, argv, args, opts):
  21.759 +        return vnet_create(opts.vnet,
  21.760 +                           vnetif=opts.vnetif,
  21.761 +                           bridge=opts.bridge,
  21.762 +                           security=opts.security)
  21.763 +
  21.764 +    op_vnet_create.args = [ Arg('vnet') ]
  21.765 +    op_vnet_create.opts = [ Opt('security', short='s', arg="SECURITY"),
  21.766 +                            Opt('bridge', short='b', arg="BRIDGE"),
  21.767 +                            Opt('vnetif', short='v', arg="VNETIF") ]
  21.768 +
  21.769 +    def op_vnet_delete(self, argv, args, opts):
  21.770 +        vnetid = get_vnetid(opts.vnet)
  21.771 +        return vnet_delete(vnetid, delbridge=opts.bridge)
  21.772 +
  21.773 +    op_vnet_delete.args = [ Arg('vnet') ]
  21.774 +    op_vnet_delete.opts = [ Opt('bridge', short='b') ]
  21.775 +
  21.776 +    def op_vif_add(self, argv, args, opts):
  21.777 +        vnetid = get_vnetid(opts.vnet)
  21.778 +        if opts.interface:
  21.779 +            vmac = get_mac(opts.vmac)
  21.780 +            if not vmac:
  21.781 +                raise ValueError("interface not found: %s" % opts.vmac)
  21.782 +        else:
  21.783 +            vmac = opts.vmac
  21.784 +        return vif_add(vnetid, vmac)
  21.785 +
  21.786 +    op_vif_add.args = [ Arg('vnet'), Arg('vmac') ]
  21.787 +    op_vif_add.opts = [ Opt('interface', short='i') ]
  21.788 +
  21.789 +    def op_vif_delete(self, argv, args, opts):
  21.790 +        vnetid = get_vnetid(opts.vnet)
  21.791 +        if opts.interface:
  21.792 +            vmac = get_mac(opts.vmac)
  21.793 +        else:
  21.794 +            vmac = opts.vmac
  21.795 +        return vif_del(vnetid, vmac)
  21.796 +
  21.797 +    op_vif_delete.args = [ Arg('vnet'), Arg('vmac') ]
  21.798 +    op_vif_delete.opts = [ Opt('interface', short='i') ]
  21.799 +
  21.800 +    def op_peer_add(self, argv, args, opts):
  21.801 +        addr = get_addr(opts.addr)
  21.802 +        if(opts.port):
  21.803 +            port = get_port(opts.port)
  21.804 +        else:
  21.805 +            port = None
  21.806 +        return peer_add(addr, port)
  21.807 +        
  21.808 +    op_peer_add.args = [ Arg('addr') ]
  21.809 +    op_peer_add.opts = [ Opt('port', short='p') ]
  21.810 +    
  21.811 +    def op_peer_delete(self, argv, args, opts):
  21.812 +        addr = get_addr(opts.addr)
  21.813 +        return peer_del(addr)
  21.814 +
  21.815 +    op_peer_delete.args = [ Arg('addr') ]
  21.816 +    
  21.817 +    def op_peers(self, argv, args, opts):
  21.818 +        for v in peer_list():
  21.819 +            prettyprint(v)
  21.820 +            print
  21.821 +
  21.822 +    def op_bridges(self, argv, args, opts):
  21.823 +        if opts.long:
  21.824 +            for bridge in Bridge.getBridges():
  21.825 +                cmd(CMD_IFCONFIG, bridge)
  21.826 +                interfaces = Bridge.getBridgeInterfaces(bridge)
  21.827 +                if interfaces:
  21.828 +                    print "          Interfaces:", ", ".join(interfaces)
  21.829 +                    print
  21.830 +        else:
  21.831 +            for bridge in Bridge.getBridges():
  21.832 +                print bridge,
  21.833 +                interfaces = Bridge.getBridgeInterfaces(bridge)
  21.834 +                if interfaces:
  21.835 +                    print ":", ", ".join(interfaces)
  21.836 +                else:
  21.837 +                    print
  21.838 +            
  21.839 +    op_bridges.opts = [ Opt('long', short='l') ]
  21.840 +
  21.841 +    def op_insmod(self, argv, args, opts):
  21.842 +        """Insert the vnet kernel module."""
  21.843 +        cmd("/etc/xen/scripts/vnet-insert", *args)
  21.844 +
  21.845 +    long_help          = """Control utility for vnets (virtual networking).
  21.846 +Report bugs to Mike Wray <mike.wray@hp.com>.
  21.847 +"""
  21.848 +
  21.849 +    op_help.use        = "Print help."
  21.850 +    op_help.help       = "Print help, long help if the option -l or --long is given."
  21.851 +
  21.852 +    op_vnets.use       = """Print vnets."""
  21.853 +    op_vnets.help      = """Print vnet information, where options are:
  21.854 +    -a, -all           Print vnets, vifs and varp info.
  21.855 +    -l, --long         Print ifconfigs for vnet interfaces."""
  21.856 +
  21.857 +    op_vifs.use        = "Print vifs."
  21.858 +
  21.859 +    op_vnifs.use       = "Print ifconfigs for vnet network interfaces."
  21.860 +
  21.861 +    op_varp.use        = "Print varp info and entries in the varp cache."
  21.862 +
  21.863 +    op_varp_flush.use  = "Flush the varp cache."
  21.864 +    
  21.865 +    op_vnet_create.use = "Create a vnet."
  21.866 +
  21.867 +    op_vnet_delete.use = "Delete a vnet."
  21.868 +    op_vnet_delete.help = """Delete a vnet.
  21.869 +    -b, --bridge       Delete the bridge the vnet interface is attached to.
  21.870 +    """
  21.871 +
  21.872 +    op_vif_add.use     = "Add a vif to a vnet."
  21.873 +    op_vif_add.help    = """Add a vif to a vnet. Not usually needed as vifs
  21.874 +are added automatically.
  21.875 +    -i, --interface    The vmac is the name of an interface to get the mac from."""
  21.876 +
  21.877 +    op_vif_delete.use  = "Delete a vif from a vnet."
  21.878 +    op_vif_delete.help = """Delete a vif from a vnet. Not usually needed as vifs
  21.879 +are removed periodically.
  21.880 +    -i, --interface    The vmac is the name of an interface to get the mac from."""
  21.881 +
  21.882 +    op_peer_add.use    = "Add a peer."
  21.883 +    op_peer_add.help   = """Add a peer: <addr> <port>
  21.884 +Vnets use multicast to discover interfaces, but networks are often configured
  21.885 +not to forward multicast. Vnets forward multicasts to peers using UDP.
  21.886 +Only add peers if multicasts are not working, check with
  21.887 +
  21.888 +ping -b 224.10.0.1
  21.889 +
  21.890 +Only add peers at one machine in a subnet, otherwise you may cause forwarding
  21.891 +loops.
  21.892 +"""
  21.893 +
  21.894 +    op_peer_delete.use = "Delete a peer."
  21.895 +    op_peer_delete.help= "Delete a peer: <addr>"
  21.896 +
  21.897 +    op_peers.use       = "List peers."
  21.898 +    op_peers.help      = "List peers."
  21.899 +
  21.900 +    op_bridges.use     = "Print bridges."
  21.901 +
  21.902 +    op_insmod.use      = "Insert the vnet kernel module, optionally with parameters."
  21.903 +
  21.904 +if __name__ == "__main__":
  21.905 +    vn = VnMain(sys.argv)
  21.906 +    vn.main()
  21.907 +    
    22.1 --- a/tools/vnet/vnet-module/Makefile-2.6	Thu Feb 09 16:09:00 2006 +0100
    22.2 +++ b/tools/vnet/vnet-module/Makefile-2.6	Thu Feb 09 16:12:11 2006 +0100
    22.3 @@ -30,12 +30,16 @@ KERNEL_MODULE = vnet_module.ko
    22.4  #export KBUILD_VERBOSE=1
    22.5  
    22.6  .PHONY: all
    22.7 -all: module
    22.8 +all: module module_version
    22.9  
   22.10  .PHONY: module
   22.11  module modules:
   22.12  	$(MAKE) -C $(KERNEL_SRC) M=`pwd` modules
   22.13  
   22.14 +.PHONY: module_version
   22.15 +module_version:
   22.16 +	$(warning Module version $(shell strings $(KERNEL_MODULE) | grep vermagic))
   22.17 +
   22.18  .PHONY: install install-module modules_install
   22.19  install install-module modules_install: module
   22.20  	install -m 0755 -d $(DESTDIR)$(KERNEL_MODULE_DIR)
   22.21 @@ -47,6 +51,7 @@ clean:
   22.22  	-@$(RM) *.a *.o *.ko *~ .*.d .*.cmd *.mod.?
   22.23  	-@$(RM) -r .tmp_versions
   22.24  
   22.25 +.PHONY: TAGS
   22.26  TAGS:
   22.27  	etags *.c *.h
   22.28  
    23.1 --- a/tools/vnet/vnet-module/Makefile.ver	Thu Feb 09 16:09:00 2006 +0100
    23.2 +++ b/tools/vnet/vnet-module/Makefile.ver	Thu Feb 09 16:12:11 2006 +0100
    23.3 @@ -21,8 +21,8 @@
    23.4  LINUX_SERIES ?=2.6
    23.5  KERNEL_MINOR ?=-xen0
    23.6  
    23.7 -LINUX_VERSION ?= $(shell ( /bin/ls -ld $(XEN_ROOT)/linux-$(LINUX_SERIES).*-xen0 ) 2>/dev/null | \
    23.8 -                      sed -e 's!^.*linux-\(.\+\)-xen0!\1!' )
    23.9 +LINUX_VERSION ?= $(shell (/bin/ls -ld $(XEN_ROOT)/pristine-linux-$(LINUX_SERIES).* 2>/dev/null) | \
   23.10 +                      sed -e 's!^.*linux-\(.\+\)!\1!' )
   23.11  
   23.12  ifeq ($(LINUX_VERSION),)
   23.13  $(error Kernel source for linux $(LINUX_SERIES) not found)
   23.14 @@ -32,7 +32,13 @@ KERNEL_VERSION =$(LINUX_VERSION)$(KERNEL
   23.15  
   23.16  KERNEL_SRC ?= $(XEN_ROOT)/linux-$(KERNEL_VERSION)
   23.17  
   23.18 -KERNEL_MODULE_DIR = /lib/modules/$(KERNEL_VERSION)/kernel
   23.19 +# Get the full kernel release version from its makefile, as the source path
   23.20 +# may not have the extraversion, e.g. linux-2.6.12-xen0 may contain release 2.6.12.6-xen0.
   23.21 +KERNEL_RELEASE = $(shell make -s -C $(KERNEL_SRC) kernelrelease || \
   23.22 +	make -f $(shell pwd)/Makefile.kver -s -C $(KERNEL_SRC) kernelrelease )
   23.23  
   23.24 +KERNEL_MODULE_DIR = /lib/modules/$(KERNEL_RELEASE)/kernel
   23.25 +
   23.26 +$(warning KERNEL_SRC     $(KERNEL_SRC))
   23.27  #$(warning KERNEL_VERSION $(KERNEL_VERSION))
   23.28 -#$(warning KERNEL_SRC $(KERNEL_SRC))
   23.29 +$(warning KERNEL_RELEASE $(KERNEL_RELEASE))
    24.1 --- a/tools/vnet/vnet-module/Makefile.vnet	Thu Feb 09 16:09:00 2006 +0100
    24.2 +++ b/tools/vnet/vnet-module/Makefile.vnet	Thu Feb 09 16:12:11 2006 +0100
    24.3 @@ -32,6 +32,8 @@ VNET_SRC += sa_algorithm.c
    24.4  VNET_SRC += sa.c
    24.5  VNET_SRC += skb_context.c
    24.6  VNET_SRC += skb_util.c
    24.7 +VNET_SRC += sxpr_util.c
    24.8 +VNET_SRC += timer_util.c
    24.9  VNET_SRC += tunnel.c
   24.10  VNET_SRC += varp.c
   24.11  VNET_SRC += varp_socket.c
   24.12 @@ -39,12 +41,15 @@ VNET_SRC += vif.c
   24.13  VNET_SRC += vnet.c
   24.14  VNET_SRC += vnet_dev.c
   24.15  VNET_SRC += vnet_ioctl.c
   24.16 +VNET_SRC += vnet_eval.c
   24.17 +VNET_SRC += vnet_forward.c
   24.18  
   24.19  VNET_LIB_SRC += allocate.c
   24.20  VNET_LIB_SRC += enum.c
   24.21  VNET_LIB_SRC += hash_table.c
   24.22  VNET_LIB_SRC += iostream.c
   24.23  VNET_LIB_SRC += kernel_stream.c
   24.24 +VNET_LIB_SRC += mem_stream.c
   24.25  VNET_LIB_SRC += sxpr.c
   24.26  VNET_LIB_SRC += sxpr_parser.c
   24.27  VNET_LIB_SRC += sys_net.c
    25.1 --- a/tools/vnet/vnet-module/esp.c	Thu Feb 09 16:09:00 2006 +0100
    25.2 +++ b/tools/vnet/vnet-module/esp.c	Thu Feb 09 16:12:11 2006 +0100
    25.3 @@ -1,5 +1,5 @@
    25.4  /*
    25.5 - * Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
    25.6 + * Copyright (C) 2004, 2005 Mike Wray <mike.wray@hp.com>
    25.7   *
    25.8   * This program is free software; you can redistribute it and/or modify
    25.9   * it under the terms of the GNU General Public License as published by the 
   25.10 @@ -51,6 +51,7 @@
   25.11  #include <tunnel.h>
   25.12  #include <vnet.h>
   25.13  #include <skb_util.h>
   25.14 +#include <skb_context.h>
   25.15  
   25.16  static const int DEBUG_ICV = 0;
   25.17  
   25.18 @@ -59,6 +60,18 @@ static const int DEBUG_ICV = 0;
   25.19  #undef DEBUG
   25.20  #include "debug.h"
   25.21  
   25.22 +#ifndef CONFIG_CRYPTO_HMAC
   25.23 +#warning No esp transform - CONFIG_CRYPTO_HMAC not defined
   25.24 +
   25.25 +int __init esp_module_init(void){
   25.26 +    return 0;
   25.27 +}
   25.28 +
   25.29 +void __exit esp_module_exit(void){
   25.30 +}
   25.31 +
   25.32 +#else
   25.33 +
   25.34  /* Outgoing packet:                            [ eth | ip | data ]
   25.35   * After etherip:        [ eth2 | ip2 |  ethip | eth | ip | data ]
   25.36   * After esp   :   [ eth2 | ip2 | esp | {ethip | eth | ip | data} | pad | icv ]
   25.37 @@ -221,7 +234,7 @@ static int esp_sa_digest(ESPState *esp, 
   25.38      
   25.39      if(DEBUG_ICV){
   25.40          dprintf("> skb digest_n=%d icv_n=%d\n", digest_n, icv_n);
   25.41 -        skb_print_bits(skb, 0, digest_n);
   25.42 +        skb_print_bits("esp", skb, 0, digest_n);
   25.43      }
   25.44      memset(icv, 0, icv_n);
   25.45      esp->digest.icv(esp, skb, 0, digest_n, icv);
   25.46 @@ -248,7 +261,7 @@ static int esp_check_icv(SAState *sa, ES
   25.47      if(DEBUG_ICV){
   25.48          dprintf("> skb len=%d digest_n=%d icv_n=%d\n",
   25.49                  skb->len, digest_n, icv_n);
   25.50 -        skb_print_bits(skb, 0, skb->len);
   25.51 +        skb_print_bits("esp", skb, 0, skb->len);
   25.52      }
   25.53      if(skb_copy_bits(skb, digest_n, icv_skb, icv_n)){
   25.54          wprintf("> Error getting icv from skb\n");
   25.55 @@ -309,7 +322,7 @@ static int esp_sa_send(SAState *sa, stru
   25.56      dprintf("> len=%d plaintext=%d ciphertext=%d extra=%d\n",
   25.57              skb->len, plaintext_n, ciphertext_n, extra_n);
   25.58      dprintf("> iv=%d icv=%d\n", iv_n, icv_n);
   25.59 -    skb_print_bits(skb, 0, skb->len);
   25.60 +    skb_print_bits("iv", skb, 0, skb->len);
   25.61  
   25.62      // Add headroom for esp header and iv, tailroom for the ciphertext
   25.63      // and icv.
   25.64 @@ -393,9 +406,12 @@ static void esp_context_free_fn(SkbConte
   25.65   * Does ESP receive processing (check icv, decrypt), strips
   25.66   * ESP header and re-receives.
   25.67   *
   25.68 + * If return 1 the packet has been freed.
   25.69 + * If return <= 0 the caller must free.
   25.70 + *
   25.71   * @param sa SA
   25.72   * @param skb packet
   25.73 - * @return 0 on success, negative error code otherwise
   25.74 + * @return >= 0 on success, negative protocol otherwise
   25.75   */
   25.76  static int esp_sa_recv(SAState *sa, struct sk_buff *skb){
   25.77      int err = -EINVAL;
   25.78 @@ -458,10 +474,19 @@ static int esp_sa_recv(SAState *sa, stru
   25.79                             sa, esp_context_free_fn);
   25.80      if(err) goto exit;
   25.81      // Increase sa refcount now the skb context refers to it.
   25.82 +    // Refcount is decreased by esp_context_free_fn.
   25.83      SAState_incref(sa);
   25.84 -    err = netif_rx(skb);
   25.85 +    // Deliver skb to be received by network code.
   25.86 +    // Not safe to refer to the skb after this.
   25.87 +    // todo: return -skb->nh.iph->protocol instead?
   25.88 +    netif_rx(skb);
   25.89    exit:
   25.90 -    if(mine) err = 1;
   25.91 +    if(mine){
   25.92 +        if(err < 0){
   25.93 +            kfree_skb(skb);
   25.94 +        }
   25.95 +        err = 1;
   25.96 +    }
   25.97      dprintf("< skb=%p err=%d\n", skb, err);
   25.98      return err;
   25.99  }
  25.100 @@ -717,9 +742,15 @@ static int esp_skb_header(struct sk_buff
  25.101   * Lookup spi, if state found hand to the state.
  25.102   * If no state, check spi, if ok, create state and pass to it.
  25.103   * If spi not ok, drop.
  25.104 + *
  25.105 + * Return value convention for protocols:
  25.106 + * >= 0 Protocol took the packet
  25.107 + * < 0  A -ve protocol id the packet should be re-received as.
  25.108 + *
  25.109 + * So always return >=0 if we took the packet, even if we dropped it.
  25.110   * 
  25.111   * @param skb packet
  25.112 - * @return 0 on sucess, negative error code otherwise
  25.113 + * @return 0 on sucess, negative protocol number otherwise
  25.114   */
  25.115  static int esp_protocol_recv(struct sk_buff *skb){
  25.116      int err = 0;
  25.117 @@ -730,7 +761,10 @@ static int esp_protocol_recv(struct sk_b
  25.118      u32 addr;
  25.119      
  25.120      dprintf(">\n");
  25.121 -    dprintf("> recv skb=\n"); skb_print_bits(skb, 0, skb->len);
  25.122 +#ifdef DEBUG
  25.123 +    dprintf("> recv skb=\n"); 
  25.124 +    skb_print_bits(skb, 0, skb->len);
  25.125 +#endif
  25.126      ip_n = (skb->nh.iph->ihl << 2);
  25.127      if(skb->data == skb->mac.raw){
  25.128          // skb->data points at ethernet header.
  25.129 @@ -751,9 +785,14 @@ static int esp_protocol_recv(struct sk_b
  25.130          err = vnet_sa_create(esph->spi, IPPROTO_ESP, addr, &sa);
  25.131          if(err) goto exit;
  25.132      }
  25.133 +    //todo: Return a -ve protocol instead? See esp_sa_recv.
  25.134      err = SAState_recv(sa, skb);
  25.135    exit:
  25.136      if(sa) SAState_decref(sa);
  25.137 +    if(err <= 0){
  25.138 +        kfree_skb(skb);
  25.139 +        err = 0;
  25.140 +    }
  25.141      dprintf("< err=%d\n", err);
  25.142      return err;
  25.143  }
  25.144 @@ -861,3 +900,4 @@ void __exit esp_module_exit(void){
  25.145      }
  25.146  }
  25.147  
  25.148 +#endif // CONFIG_CRYPTO_HMAC
    26.1 --- a/tools/vnet/vnet-module/esp.h	Thu Feb 09 16:09:00 2006 +0100
    26.2 +++ b/tools/vnet/vnet-module/esp.h	Thu Feb 09 16:12:11 2006 +0100
    26.3 @@ -1,5 +1,5 @@
    26.4  /*
    26.5 - * Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
    26.6 + * Copyright (C) 2004, 2005 Mike Wray <mike.wray@hp.com>
    26.7   *
    26.8   * This program is free software; you can redistribute it and/or modify
    26.9   * it under the terms of the GNU General Public License as published by the 
   26.10 @@ -19,10 +19,19 @@
   26.11  #ifndef __VNET_ESP_H__
   26.12  #define __VNET_ESP_H__
   26.13  
   26.14 +#ifdef __KERNEL__
   26.15  #include <linux/config.h>
   26.16  #include <linux/types.h>
   26.17  #include <linux/crypto.h>
   26.18  
   26.19 +#else
   26.20 +
   26.21 +#include "sys_kernel.h"
   26.22 +
   26.23 +struct crypto_tfm;
   26.24 +
   26.25 +#endif
   26.26 +
   26.27  /** Header used by IPSEC ESP (Encapsulated Security Payload). */
   26.28  typedef struct ESPHdr {
   26.29      /** The spi (security parameters index). */
    27.1 --- a/tools/vnet/vnet-module/etherip.c	Thu Feb 09 16:09:00 2006 +0100
    27.2 +++ b/tools/vnet/vnet-module/etherip.c	Thu Feb 09 16:12:11 2006 +0100
    27.3 @@ -1,5 +1,5 @@
    27.4  /*
    27.5 - * Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
    27.6 + * Copyright (C) 2004, 2005 Mike Wray <mike.wray@hp.com>
    27.7   *
    27.8   * This program is free software; you can redistribute it and/or modify
    27.9   * it under the terms of the GNU General Public License as published by the 
   27.10 @@ -16,6 +16,8 @@
   27.11   * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA
   27.12   *
   27.13   */
   27.14 +#ifdef __KERNEL__
   27.15 +
   27.16  #include <linux/config.h>
   27.17  #include <linux/module.h>
   27.18  #include <linux/types.h>
   27.19 @@ -29,14 +31,31 @@
   27.20  #include <linux/netdevice.h>
   27.21  #include <linux/in.h>
   27.22  #include <linux/inet.h>
   27.23 +#include <linux/netfilter_bridge.h>
   27.24  #include <linux/netfilter_ipv4.h>
   27.25  #include <linux/icmp.h>
   27.26 +#include <linux/udp.h>
   27.27  
   27.28  #include <net/ip.h>
   27.29  #include <net/protocol.h>
   27.30  #include <net/route.h>
   27.31  #include <net/checksum.h>
   27.32  
   27.33 +#else
   27.34 +
   27.35 +#include <netinet/in.h>
   27.36 +#include <arpa/inet.h>
   27.37 +
   27.38 +#include "sys_kernel.h"
   27.39 +#include "spinlock.h"
   27.40 +#include "skbuff.h"
   27.41 +#include <linux/ip.h>
   27.42 +#include <linux/udp.h>
   27.43 +
   27.44 +#define IP_DF		0x4000		/* Flag: "Don't Fragment"	*/
   27.45 +
   27.46 +#endif
   27.47 +
   27.48  #include <etherip.h>
   27.49  #include <tunnel.h>
   27.50  #include <vnet.h>
   27.51 @@ -44,9 +63,10 @@
   27.52  #include <if_varp.h>
   27.53  #include <varp.h>
   27.54  #include <skb_util.h>
   27.55 +#include <skb_context.h>
   27.56  
   27.57  #define MODULE_NAME "VNET"
   27.58 -//#define DEBUG 1
   27.59 +#define DEBUG 1
   27.60  #undef DEBUG
   27.61  #include "debug.h"
   27.62  
   27.63 @@ -54,6 +74,12 @@
   27.64   * The etherip protocol is used to transport Ethernet frames in IP packets.
   27.65   */
   27.66  
   27.67 +/** Flag controlling whether to use etherip-in-udp encapsulation.
   27.68 + * If false we send etherip protocol in IP packets.
   27.69 + * If true we send etherip protocol in UDP packets with a vnet header.
   27.70 + */
   27.71 +int etherip_in_udp = 1;
   27.72 +
   27.73  /** Get the vnet label from an etherip header.
   27.74   *
   27.75   * @param hdr header
   27.76 @@ -64,7 +90,7 @@ void etheriphdr_get_vnet(struct etheriph
   27.77      *vnet = *(VnetId*)hdr->vnet;
   27.78  #else
   27.79      *vnet = (VnetId){};
   27.80 -    vnet->u.vnet16[7] = (unsigned short)hdr->reserved;
   27.81 +    vnet->u.vnet16[VNET_SIZE16 - 1] = (unsigned short)hdr->reserved;
   27.82      
   27.83  #endif
   27.84  }
   27.85 @@ -81,7 +107,7 @@ void etheriphdr_set_vnet(struct etheriph
   27.86      *(VnetId*)hdr->vnet = *vnet;
   27.87  #else
   27.88      hdr->version = ETHERIP_VERSION;
   27.89 -    hdr->reserved = (vnet->u.vnet16[7] & 0x0fff);
   27.90 +    hdr->reserved = (vnet->u.vnet16[VNET_SIZE16 - 1] & 0x0fff);
   27.91  #endif
   27.92  }
   27.93  
   27.94 @@ -112,55 +138,69 @@ static void etherip_tunnel_close(Tunnel 
   27.95   */
   27.96  static int etherip_tunnel_send(Tunnel *tunnel, struct sk_buff *skb){
   27.97      int err = 0;
   27.98 -    const int etherip_n = sizeof(struct etheriphdr);
   27.99      const int ip_n = sizeof(struct iphdr);
  27.100 -    const int eth_n = ETH_HLEN;
  27.101 -    int head_n = 0;
  27.102 +    const int etherip_n = sizeof(struct etheriphdr);
  27.103 +    const int udp_n = sizeof(struct udphdr);
  27.104 +    const int vnet_n = sizeof(struct VnetMsgHdr);
  27.105 +    int head_n = etherip_n + ip_n /* +  ETH_HLEN */;
  27.106      VnetId *vnet = &tunnel->key.vnet;
  27.107      struct etheriphdr *etheriph;
  27.108 -    struct ethhdr *ethh;
  27.109      u32 saddr = 0;
  27.110  
  27.111 -    //dprintf("> skb=%p vnet=%d\n", skb, vnet);
  27.112 -    head_n = etherip_n + ip_n + eth_n;
  27.113 +    if(etherip_in_udp){
  27.114 +        head_n += vnet_n + udp_n;
  27.115 +    }
  27.116      err = skb_make_room(&skb, skb, head_n, 0);
  27.117      if(err) goto exit;
  27.118  
  27.119 -    //err = vnet_get_device_address(skb->dev, &saddr);
  27.120 -    //if(err) goto exit;
  27.121 -    
  27.122 -    // The original ethernet header.
  27.123 -    ethh = eth_hdr(skb);
  27.124 -    //print_skb_data(__FUNCTION__, 0, skb, skb->mac.raw, skb->len);
  27.125      // Null the pointer as we are pushing a new IP header.
  27.126      skb->mac.raw = NULL;
  27.127  
  27.128      // Setup the etherip header.
  27.129 -    //dprintf("> push etherip header...\n");
  27.130 -    etheriph = (struct etheriphdr *)skb_push(skb, etherip_n);
  27.131 +    etheriph = (void*)skb_push(skb, etherip_n);
  27.132      etheriphdr_set_vnet(etheriph, vnet);
  27.133  
  27.134 +    if(etherip_in_udp){
  27.135 +        // Vnet header.
  27.136 +        struct VnetMsgHdr *vhdr = (void*)skb_push(skb, vnet_n);
  27.137 +        vhdr->id     = htons(VUDP_ID);
  27.138 +        vhdr->opcode = 0;
  27.139 +
  27.140 +        // Setup the UDP header.
  27.141 +        skb->h.raw = skb_push(skb, udp_n);
  27.142 +        skb->h.uh->source = varp_port;		// Source port.
  27.143 +        skb->h.uh->dest   = varp_port;		// Destination port.
  27.144 +        skb->h.uh->len    = htons(skb->len);	// Total packet length (bytes).
  27.145 +        skb->h.uh->check  = 0;
  27.146 +    }
  27.147 +
  27.148      // Setup the IP header.
  27.149 -    //dprintf("> push IP header...\n");
  27.150      skb->nh.raw = skb_push(skb, ip_n); 
  27.151      skb->nh.iph->version  = 4;			// Standard version.
  27.152      skb->nh.iph->ihl      = ip_n / 4;		// IP header length (32-bit words).
  27.153      skb->nh.iph->tos      = 0;			// No special type-of-service.
  27.154      skb->nh.iph->tot_len  = htons(skb->len);    // Total packet length (bytes).
  27.155      skb->nh.iph->id       = 0;			// No flow id (since no frags).
  27.156 -    skb->nh.iph->frag_off = htons(IP_DF);	// Don't fragment - can't handle frags.
  27.157 +    if(etherip_in_udp){
  27.158 +        skb->nh.iph->protocol = IPPROTO_UDP;    // IP protocol number.
  27.159 +        skb->nh.iph->frag_off = 0;
  27.160 +    } else {
  27.161 +        skb->nh.iph->protocol = IPPROTO_ETHERIP;// IP protocol number.
  27.162 +        skb->nh.iph->frag_off = htons(IP_DF);	// Don't fragment - can't handle frags.
  27.163 +    }
  27.164      skb->nh.iph->ttl      = 64;			// Linux default time-to-live.
  27.165 -    skb->nh.iph->protocol = IPPROTO_ETHERIP;    // IP protocol number.
  27.166      skb->nh.iph->saddr    = saddr;		// Source address.
  27.167 -    skb->nh.iph->daddr    = tunnel->key.addr.u.ip4.s_addr;	// Destination address.
  27.168 -    skb->nh.iph->check    = 0;
  27.169 +    skb->nh.iph->daddr    = tunnel->key.addr.u.ip4.s_addr; // Destination address.
  27.170 +    skb->nh.iph->check    = 0;			// Zero the checksum.
  27.171  
  27.172      // Ethernet header will be filled-in by device.
  27.173      err = Tunnel_send(tunnel->base, skb);
  27.174      skb = NULL;
  27.175    exit:
  27.176 -    if(err && skb) dev_kfree_skb(skb);
  27.177 -    //dprintf("< err=%d\n", err);
  27.178 +    if(err && skb){
  27.179 +        wprintf("< err=%d\n", err);
  27.180 +        kfree_skb(skb);
  27.181 +    }
  27.182      return err;
  27.183  }
  27.184  
  27.185 @@ -175,73 +215,59 @@ static TunnelType _etherip_tunnel_type =
  27.186  
  27.187  TunnelType *etherip_tunnel_type = &_etherip_tunnel_type;
  27.188  
  27.189 -/* Defeat compiler warnings about unused functions. */
  27.190 -static void print_str(char *s, int n) __attribute__((unused));
  27.191 -
  27.192 -static void print_str(char *s, int n) {
  27.193 -    int i;
  27.194 -
  27.195 -    for(i=0; i<n; s++, i++){
  27.196 -        if(i && i % 40 == 0) printk("\n");
  27.197 -        if(('a'<= *s && *s <= 'z') ||
  27.198 -           ('A'<= *s && *s <= 'Z') ||
  27.199 -           ('0'<= *s && *s <= '9')){
  27.200 -            printk("%c", *s);
  27.201 -        } else {
  27.202 -            printk("<%x>", (unsigned)(0xff & *s));
  27.203 -        }
  27.204 -    }
  27.205 -    printk("\n");
  27.206 +int etherip_tunnel_create(VnetId *vnet, VarpAddr *addr, Tunnel *base, Tunnel **tunnel){
  27.207 +    return Tunnel_create(etherip_tunnel_type, vnet, addr, base, tunnel);
  27.208  }
  27.209  
  27.210  /** Do etherip receive processing.
  27.211 - * Strips etherip header to extract the ethernet frame, sets
  27.212 + * Strips the etherip header to extract the ethernet frame, sets
  27.213   * the vnet from the header and re-receives the frame.
  27.214   *
  27.215 + * Return code 1 means we now own the packet - the caller must not free it.
  27.216 + * Return code < 0 means an error - caller still owns the packet.
  27.217 + *
  27.218   * @param skb packet
  27.219 - * @return 0 on success, error code otherwise
  27.220 + * @return 1 on success, error code otherwise
  27.221   */
  27.222 -static int etherip_protocol_recv(struct sk_buff *skb){
  27.223 +int etherip_protocol_recv(struct sk_buff *skb){
  27.224      int err = 0;
  27.225 -    int mine = 0;
  27.226 -    const int eth_n = ETH_HLEN;
  27.227 -    int ip_n;
  27.228      const int etherip_n = sizeof(struct etheriphdr);
  27.229      struct etheriphdr *etheriph;
  27.230 -    struct ethhdr *ethhdr;
  27.231      Vnet *vinfo = NULL;
  27.232      VnetId vnet = {};
  27.233      u32 saddr, daddr;
  27.234      char vnetbuf[VNET_ID_BUF];
  27.235 +    struct ethhdr *eth;
  27.236  
  27.237 +    dprintf(">\n");
  27.238      saddr = skb->nh.iph->saddr;
  27.239      daddr = skb->nh.iph->daddr;
  27.240 -    ethhdr = eth_hdr(skb);
  27.241      if(MULTICAST(daddr) && (daddr != varp_mcast_addr)){
  27.242          // Ignore multicast packets not addressed to us.
  27.243 -        dprintf("> Ignoring mcast skb: src=%u.%u.%u.%u dst=%u.%u.%u.%u"
  27.244 +        wprintf("> Ignoring mcast skb: src=%u.%u.%u.%u dst=%u.%u.%u.%u"
  27.245                  " varp_mcast_addr=%u.%u.%u.%u\n",
  27.246                  NIPQUAD(saddr), NIPQUAD(daddr), NIPQUAD(varp_mcast_addr));
  27.247          goto exit;
  27.248      }
  27.249 -    ip_n = (skb->nh.iph->ihl << 2);
  27.250      if(skb->data == skb->mac.raw){
  27.251          // skb->data points at ethernet header.
  27.252 +        //FIXME: Does this ever happen?
  27.253          //dprintf("> len=%d\n", skb->len);
  27.254 -        if (!pskb_may_pull(skb, eth_n + ip_n)){
  27.255 +        int ip_n = (skb->nh.iph->ihl << 2);
  27.256 +        int pull_n = ETH_HLEN + ip_n;
  27.257 +        if (!pskb_may_pull(skb, pull_n)){
  27.258              wprintf("> Malformed skb (eth+ip) src=%u.%u.%u.%u\n",
  27.259                      NIPQUAD(saddr));
  27.260              err = -EINVAL;
  27.261              goto exit;
  27.262          }
  27.263 -        skb_pull(skb, eth_n + ip_n);
  27.264 +        skb_pull(skb, pull_n);
  27.265      }
  27.266      // Assume skb->data points at etherip header.
  27.267      etheriph = (void*)skb->data;
  27.268      if(etheriph->version != ETHERIP_VERSION){
  27.269          wprintf("> Bad etherip version=%d src=%u.%u.%u.%u\n",
  27.270 -                etheriph->version,
  27.271 -                NIPQUAD(saddr));
  27.272 +                etheriph->version, NIPQUAD(saddr));
  27.273          err = -EINVAL;
  27.274          goto exit;
  27.275      }
  27.276 @@ -252,106 +278,82 @@ static int etherip_protocol_recv(struct 
  27.277          goto exit;
  27.278      }
  27.279      etheriphdr_get_vnet(etheriph, &vnet);
  27.280 -    dprintf("> Rcvd skb vnet=%s src=%u.%u.%u.%u\n",
  27.281 -            VnetId_ntoa(&vnet, vnetbuf),
  27.282 -            NIPQUAD(saddr));
  27.283      // If vnet is secure, context must include IPSEC ESP.
  27.284      err = vnet_check_context(&vnet, SKB_CONTEXT(skb), &vinfo);
  27.285 -    Vnet_decref(vinfo);
  27.286      if(err){
  27.287          wprintf("> Failed security check vnet=%s src=%u.%u.%u.%u\n",
  27.288 -                VnetId_ntoa(&vnet, vnetbuf),
  27.289 -                NIPQUAD(saddr));
  27.290 +                VnetId_ntoa(&vnet, vnetbuf), NIPQUAD(saddr));
  27.291          goto exit;
  27.292      }
  27.293 -    mine = 1;
  27.294      // Point at the headers in the contained ethernet frame.
  27.295      skb->mac.raw = skb_pull(skb, etherip_n);
  27.296 +    eth = eth_hdr(skb);
  27.297  
  27.298 -    // Know source ip, vnet, vmac, so could update varp cache.
  27.299 -    // But if traffic comes to us over a vnetd tunnel this points the coa
  27.300 -    // at the vnetd rather than the endpoint. So don't do it.
  27.301 -    //varp_update(vnet, eth_hdr(skb)->h_source, skb->nh.iph->saddr);
  27.302 -
  27.303 +    // Simulate the logic from eth_type_trans()
  27.304 +    // to set skb->pkt_type and skb->protocol.
  27.305 +    if(mac_is_multicast(eth->h_dest)){
  27.306 +        if(mac_is_broadcast(eth->h_dest)){
  27.307 +            skb->pkt_type = PACKET_BROADCAST;
  27.308 +        } else {
  27.309 +            skb->pkt_type = PACKET_MULTICAST;
  27.310 +        }
  27.311 +    } else {
  27.312 +        skb->pkt_type = PACKET_HOST;
  27.313 +    }
  27.314 +    if(ntohs(eth->h_proto) >= 1536){
  27.315 +        skb->protocol = eth->h_proto;
  27.316 +    } else {
  27.317 +        skb->protocol = htons(ETH_P_802_2);
  27.318 +    }
  27.319 +    
  27.320      // Assuming a standard Ethernet frame.
  27.321      // Should check for protocol? Support ETH_P_8021Q too.
  27.322      skb->nh.raw = skb_pull(skb, ETH_HLEN);
  27.323  
  27.324 -    dprintf("> Unpacked vnet=%s srcmac=" MACFMT " dstmac=" MACFMT "\n",
  27.325 -            VnetId_ntoa(&vnet, vnetbuf),
  27.326 -            MAC6TUPLE(eth_hdr(skb)->h_source),
  27.327 -            MAC6TUPLE(eth_hdr(skb)->h_dest));
  27.328 -
  27.329 -#ifdef CONFIG_NETFILTER
  27.330 -#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
  27.331 -    // This stops our new pkt header being clobbered by a subsequent
  27.332 -    // call to nf_bridge_maybe_copy_header.
  27.333 -    // Code from nf_bridge_save_header() modidifed to use h_proto
  27.334 -    // instead of skb->protocol.
  27.335 +#ifdef __KERNEL__
  27.336 +    // Fix IP options, checksum, skb dst, netfilter state.
  27.337 +    memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
  27.338 +    if (skb->ip_summed == CHECKSUM_HW){
  27.339 +        skb->ip_summed = CHECKSUM_NONE;
  27.340 +    }
  27.341 +    dst_release(skb->dst);
  27.342 +    skb->dst = NULL;
  27.343 +    nf_reset(skb);
  27.344 +#ifdef CONFIG_BRIDGE_NETFILTER
  27.345 +    // Stop the eth header being clobbered by nf_bridge_maybe_copy_header().
  27.346 +    // Were using this modified to use h_proto instead of skb->protocol.
  27.347      if(skb->nf_bridge){
  27.348 -        // Hmm. Standard ethernet header is ETH_HLEN (14),
  27.349 -        // VLAN header (802.1q) is VLAN_ETH_HLEN (18).
  27.350 -        // Where does 16 come from?
  27.351 -        int header_size = 16;
  27.352 -        if(eth_hdr(skb)->h_proto == __constant_htons(ETH_P_8021Q)) {
  27.353 -            header_size = 18;
  27.354 -        }
  27.355 -        memcpy(skb->nf_bridge->data, skb->data - header_size, header_size);
  27.356 +        nf_bridge_save_header(skb);
  27.357      }
  27.358  #endif
  27.359 -#endif
  27.360 -    
  27.361 -    if(1){
  27.362 -	struct ethhdr *eth = eth_hdr(skb);
  27.363 -        // Devices use eth_type_trans() to set skb->pkt_type and skb->protocol.
  27.364 -        // Set them from contained ethhdr, or leave as received?
  27.365 -        // 'Ware use of hard_header_len in eth_type_trans().
  27.366 -
  27.367 -        //skb->protocol = htons(ETH_P_IP);
  27.368 +#endif // __KERNEL__
  27.369  
  27.370 -        if(ntohs(eth->h_proto) >= 1536){
  27.371 -            skb->protocol = eth->h_proto;
  27.372 -        } else {
  27.373 -            skb->protocol = htons(ETH_P_802_2);
  27.374 -        }
  27.375 -        
  27.376 -	if(mac_is_multicast(eth->h_dest)){
  27.377 -            if(mac_is_broadcast(eth->h_dest)){
  27.378 -                skb->pkt_type = PACKET_BROADCAST;
  27.379 -	    } else {
  27.380 -                skb->pkt_type = PACKET_MULTICAST;
  27.381 -            }
  27.382 -        } else {
  27.383 -            skb->pkt_type = PACKET_HOST;
  27.384 -	}
  27.385 +    dprintf("> Unpacked srcaddr=" IPFMT " vnet=%s srcmac=" MACFMT " dstmac=" MACFMT "\n",
  27.386 +            NIPQUAD(skb->nh.iph->saddr),
  27.387 +            VnetId_ntoa(&vnet, vnetbuf),
  27.388 +            MAC6TUPLE(eth->h_source),
  27.389 +            MAC6TUPLE(eth->h_dest));
  27.390 +    //print_skb(__FUNCTION__, 0, skb);
  27.391  
  27.392 -        memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
  27.393 -        if (skb->ip_summed == CHECKSUM_HW){
  27.394 -            skb->ip_summed = CHECKSUM_NONE;
  27.395 -            //skb->csum = csum_sub(skb->csum,
  27.396 -            //                     csum_partial(skb->mac.raw, skb->nh.raw - skb->mac.raw, 0));
  27.397 -        }
  27.398 -        dst_release(skb->dst);
  27.399 -        skb->dst = NULL;
  27.400 -
  27.401 -#ifdef CONFIG_NETFILTER
  27.402 -        nf_conntrack_put(skb->nfct);
  27.403 -        skb->nfct = NULL;
  27.404 -#ifdef CONFIG_NETFILTER_DEBUG
  27.405 -        skb->nf_debug = 0;
  27.406 -#endif
  27.407 -#endif
  27.408 +    {
  27.409 +        // Know source ip, vnet, vmac, so update the varp cache.
  27.410 +        // For this to work forwarded vnet packets must have the
  27.411 +        // original source address.
  27.412 +        VarpAddr addr = { .family = AF_INET };
  27.413 +        addr.u.ip4.s_addr = saddr;
  27.414 +        varp_update(&vnet, eth->h_source, &addr);
  27.415      }
  27.416  
  27.417 -    //print_skb_data(__FUNCTION__, 0, skb, skb->mac.raw, skb->len + ETH_HLEN);
  27.418 -
  27.419 -    err = vnet_skb_recv(skb, &vnet, (Vmac*)eth_hdr(skb)->h_dest);
  27.420 +    err = vnet_skb_recv(skb, vinfo);
  27.421    exit:
  27.422 -    if(mine) err = 1;
  27.423 +    if(vinfo) Vnet_decref(vinfo);
  27.424      dprintf("< skb=%p err=%d\n", skb, err);
  27.425      return err;
  27.426  }
  27.427  
  27.428 +
  27.429 +#ifdef __KERNEL__
  27.430 +
  27.431  /** Handle an ICMP error related to etherip.
  27.432   *
  27.433   * @param skb ICMP error packet
  27.434 @@ -433,3 +435,5 @@ void __exit etherip_module_exit(void) {
  27.435          printk(KERN_INFO "%s: can't remove etherip protocol\n", __FUNCTION__);
  27.436      }
  27.437  }
  27.438 +
  27.439 +#endif // __KERNEL__
    28.1 --- a/tools/vnet/vnet-module/etherip.h	Thu Feb 09 16:09:00 2006 +0100
    28.2 +++ b/tools/vnet/vnet-module/etherip.h	Thu Feb 09 16:12:11 2006 +0100
    28.3 @@ -1,5 +1,5 @@
    28.4  /*
    28.5 - * Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
    28.6 + * Copyright (C) 2004, 2005 Mike Wray <mike.wray@hp.com>
    28.7   *
    28.8   * This program is free software; you can redistribute it and/or modify
    28.9   * it under the terms of the GNU General Public License as published by the 
   28.10 @@ -21,7 +21,18 @@
   28.11  
   28.12  #include "if_etherip.h"
   28.13  
   28.14 +#ifdef __KERNEL__
   28.15  extern int etherip_module_init(void);
   28.16  extern void etherip_module_exit(void);
   28.17 +#endif
   28.18  
   28.19 +extern int etherip_protocol_recv(struct sk_buff *skb);
   28.20 +extern int etherip_in_udp;
   28.21 +
   28.22 +struct VnetId;
   28.23 +struct VarpAddr;
   28.24 +struct Tunnel;
   28.25 +
   28.26 +extern int etherip_tunnel_create(struct VnetId *vnet, struct VarpAddr *addr,
   28.27 +                                 struct Tunnel *base, struct Tunnel **tunnel);
   28.28  #endif
    29.1 --- a/tools/vnet/vnet-module/if_etherip.h	Thu Feb 09 16:09:00 2006 +0100
    29.2 +++ b/tools/vnet/vnet-module/if_etherip.h	Thu Feb 09 16:12:11 2006 +0100
    29.3 @@ -1,5 +1,5 @@
    29.4  /*
    29.5 - * Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
    29.6 + * Copyright (C) 2004, 2005 Mike Wray <mike.wray@hp.com>
    29.7   *
    29.8   * This program is free software; you can redistribute it and/or modify
    29.9   * it under the terms of the GNU General Public License as published by the 
   29.10 @@ -19,10 +19,26 @@
   29.11  #ifndef _VNET_IF_ETHERIP_H_
   29.12  #define _VNET_IF_ETHERIP_H_
   29.13  
   29.14 +#ifdef __KERNEL__
   29.15 +#include <asm/byteorder.h>
   29.16 +#else
   29.17 +#define __KERNEL__
   29.18 +/* This include may cause a compile warning, which can be ignored.
   29.19 + * Can't use <endian.h> because it doesn't define 
   29.20 + *__LITTLE_ENDIAN_BITFIELD or __BIG_ENDIAN_BITFIELD.
   29.21 + */
   29.22 +#include <asm/byteorder.h>
   29.23 +#undef __KERNEL__
   29.24 +#endif
   29.25 +
   29.26 +#include <if_varp.h>
   29.27 +
   29.28  #define CONFIG_ETHERIP_EXT
   29.29  
   29.30  #ifdef CONFIG_ETHERIP_EXT
   29.31  
   29.32 +/* Extended header with room for a longer vnet id. */
   29.33 +
   29.34  #define ETHERIP_VERSION 4
   29.35  
   29.36  struct etheriphdr {
   29.37 @@ -33,13 +49,15 @@ struct etheriphdr {
   29.38      __u16    version:4,
   29.39              reserved:12;
   29.40  #else
   29.41 -#error  "Please fix <asm/byteorder.h>"
   29.42 +#error  "Adjust your <asm/byteorder.h> defines"
   29.43  #endif
   29.44 -    __u8 vnet[16];
   29.45 +    __u8 vnet[VNETID_SIZE8];
   29.46  } __attribute__ ((packed));
   29.47  
   29.48  #else
   29.49  
   29.50 +/* Original header as in Etherip RFC. */
   29.51 +
   29.52  #define ETHERIP_VERSION 3
   29.53  
   29.54  struct etheriphdr
   29.55 @@ -51,7 +69,7 @@ struct etheriphdr
   29.56      __u16    version:4,
   29.57              reserved:12;
   29.58  #else
   29.59 -#error  "Please fix <asm/byteorder.h>"
   29.60 +#error  "Adjust your <asm/byteorder.h> defines"
   29.61  #endif
   29.62  
   29.63  };
    30.1 --- a/tools/vnet/vnet-module/if_varp.h	Thu Feb 09 16:09:00 2006 +0100
    30.2 +++ b/tools/vnet/vnet-module/if_varp.h	Thu Feb 09 16:12:11 2006 +0100
    30.3 @@ -1,5 +1,5 @@
    30.4  /*
    30.5 - * Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
    30.6 + * Copyright (C) 2004, 2005 Mike Wray <mike.wray@hp.com>
    30.7   *
    30.8   * This program is free software; you can redistribute it and/or modify
    30.9   * it under the terms of the GNU General Public License as published by the 
   30.10 @@ -22,27 +22,54 @@
   30.11  
   30.12  /* Need struct in_addr, struct in6_addr. */
   30.13  #ifdef __KERNEL__
   30.14 +
   30.15  #include <linux/in.h>
   30.16  #include <linux/in6.h>
   30.17 +
   30.18  #else
   30.19 +
   30.20 +#include <sys/socket.h>
   30.21  #include <netinet/in.h>
   30.22 +
   30.23  #endif
   30.24  
   30.25 +#include <linux/if_ether.h>
   30.26 +
   30.27  typedef struct Vmac {
   30.28      unsigned char mac[ETH_ALEN];
   30.29  } Vmac;
   30.30  
   30.31  enum {
   30.32 +    /* Varp protocol messages.
   30.33 +     * Format is defined by struct VarpHdr.
   30.34 +     */
   30.35      VARP_ID          = 1,
   30.36 +
   30.37 +    /* Vnet ethernet in udp messages.
   30.38 +     * Format is uint16_t id (VUDP_ID), then
   30.39 +     * struct etheriphdr.
   30.40 +     */
   30.41 +    VUDP_ID          = 2,
   30.42 +
   30.43 +    /* Forwarded messages.
   30.44 +     */
   30.45 +    VFWD_ID          = 3,
   30.46 +
   30.47 +    /* Varp request. */
   30.48      VARP_OP_REQUEST  = 1,
   30.49 +    /* Varp announce. */
   30.50      VARP_OP_ANNOUNCE = 2,
   30.51  };
   30.52  
   30.53 +#define VNETID_SIZE8  16
   30.54 +#define VNETID_SIZE16 (VNETID_SIZE8 >> 1)
   30.55 +#define VNETID_SIZE32 (VNETID_SIZE8 >> 2)
   30.56 +
   30.57  typedef struct VnetId {
   30.58      union {
   30.59 -        uint8_t vnet8[16];
   30.60 -        uint16_t vnet16[8];
   30.61 -        uint32_t vnet32[4];
   30.62 +        uint8_t  vnet8[VNETID_SIZE8];
   30.63 +        uint16_t vnet16[VNETID_SIZE16];
   30.64 +        uint32_t vnet32[VNETID_SIZE32];
   30.65      } u;
   30.66  } __attribute__((packed)) VnetId;
   30.67  
   30.68 @@ -53,6 +80,7 @@ typedef struct VarpAddr {
   30.69          struct in_addr ip4;
   30.70          struct in6_addr ip6;
   30.71      } u;
   30.72 +    //uint16_t port;
   30.73  } __attribute__((packed)) VarpAddr;
   30.74  
   30.75  typedef struct VnetMsgHdr {
    31.1 --- a/tools/vnet/vnet-module/random.c	Thu Feb 09 16:09:00 2006 +0100
    31.2 +++ b/tools/vnet/vnet-module/random.c	Thu Feb 09 16:12:11 2006 +0100
    31.3 @@ -1,5 +1,5 @@
    31.4  /*
    31.5 - * Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
    31.6 + * Copyright (C) 2004, 2005 Mike Wray <mike.wray@hp.com>
    31.7   *
    31.8   * This program is free software; you can redistribute it and/or modify
    31.9   * it under the terms of the GNU General Public License as published by the 
   31.10 @@ -38,20 +38,28 @@
   31.11  static unsigned long seed = 0;
   31.12  static unsigned long count = 0;
   31.13  
   31.14 -static unsigned long stir(unsigned long *a, unsigned long b){
   31.15 -    pseudo_des(a, &b);
   31.16 -    return b;
   31.17 -}    
   31.18 +/** Contribute some random bytes.
   31.19 + *
   31.20 + * @param src bytes to contribute
   31.21 + * @param src_n number of bytes
   31.22 + */
   31.23 +void add_random_bytes(const void *src, int src_n){
   31.24 +    ++count;
   31.25 +    seed = hash_hvoid(seed, &count, sizeof(count));
   31.26 +    seed = hash_hvoid(seed, src, src_n);
   31.27 +}
   31.28  
   31.29  /** Get one random byte.
   31.30   *
   31.31   * @return random byte
   31.32   */
   31.33  int get_random_byte(void){
   31.34 -    return stir(&seed, ++count);
   31.35 +    int tmp = jiffies;
   31.36 +    add_random_bytes(&tmp, sizeof(tmp));
   31.37 +    return seed;
   31.38  }
   31.39  
   31.40 -#if 0
   31.41 +#ifndef __KERNEL__
   31.42  /* Get some random bytes.
   31.43   *
   31.44   * @param dst destination for the bytes
   31.45 @@ -66,33 +74,11 @@ void get_random_bytes(void *dst, int dst
   31.46  }
   31.47  #endif
   31.48  
   31.49 -/** Contribute a random byte.
   31.50 - *
   31.51 - * @param b byte to contribute
   31.52 - */
   31.53 -void add_random_byte(int b){
   31.54 -    stir(&seed, ++count);
   31.55 -    stir(&seed, b);
   31.56 -}
   31.57 -
   31.58 -/** Contribute some random bytes.
   31.59 - *
   31.60 - * @param src bytes to contribute
   31.61 - * @param src_n number of bytes
   31.62 - */
   31.63 -void add_random_bytes(const void *src, int src_n){
   31.64 -    int i;
   31.65 -    char *p = (char *)src;
   31.66 -    for(i = 0; i < src_n; i++){
   31.67 -        add_random_byte(*p++);
   31.68 -    }
   31.69 -}
   31.70 -
   31.71  int __init random_module_init(void){
   31.72      int dummy;
   31.73      int tmp = jiffies;
   31.74      seed = (unsigned long)&dummy;
   31.75 -    add_random_byte(tmp);
   31.76 +    add_random_bytes(&tmp, sizeof(tmp));
   31.77      return 0;
   31.78  }
   31.79  
    32.1 --- a/tools/vnet/vnet-module/random.h	Thu Feb 09 16:09:00 2006 +0100
    32.2 +++ b/tools/vnet/vnet-module/random.h	Thu Feb 09 16:12:11 2006 +0100
    32.3 @@ -1,5 +1,5 @@
    32.4  /*
    32.5 - * Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
    32.6 + * Copyright (C) 2004, 2005 Mike Wray <mike.wray@hp.com>
    32.7   *
    32.8   * This program is free software; you can redistribute it and/or modify
    32.9   * it under the terms of the GNU General Public License as published by the 
   32.10 @@ -19,9 +19,7 @@
   32.11  #ifndef __VNET_RANDOM_H__
   32.12  #define __VNET_RANDOM_H__
   32.13  
   32.14 -extern int get_random_byte(void);
   32.15  extern void get_random_bytes(void *dst, int dst_n);
   32.16 -extern void add_random_byte(int b);
   32.17  extern void add_random_bytes(const void *src, int src_n);
   32.18  
   32.19  extern int random_module_init(void);
    33.1 --- a/tools/vnet/vnet-module/sa.c	Thu Feb 09 16:09:00 2006 +0100
    33.2 +++ b/tools/vnet/vnet-module/sa.c	Thu Feb 09 16:12:11 2006 +0100
    33.3 @@ -19,17 +19,11 @@
    33.4  #include <linux/config.h>
    33.5  #include <linux/kernel.h>
    33.6  
    33.7 -#include <net/ip.h>
    33.8 -#include <net/protocol.h>
    33.9 -#include <net/route.h>
   33.10 -#include <linux/skbuff.h>
   33.11 -
   33.12 -#include <linux/in.h>
   33.13 -#include <linux/inet.h>
   33.14 -#include <linux/netdevice.h>
   33.15 -
   33.16 +#include <tunnel.h>
   33.17 +#include <vnet.h>
   33.18  #include <sa.h>
   33.19  #include <sa_algorithm.h>
   33.20 +
   33.21  #include "hash_table.h"
   33.22  #include "allocate.h"
   33.23  
   33.24 @@ -120,58 +114,46 @@ static int sa_key_check(SAKey *key, enum
   33.25  
   33.26  static unsigned long sa_spi_counter = 0;
   33.27  
   33.28 +/** Mangle some input to generate output.
   33.29 + * This is used to derive spis and keying material from secrets,
   33.30 + * so it probably ought to be cryptographically strong.
   33.31 + * Probably ought to use a good hash (sha1) or cipher (aes).
   33.32 + *
   33.33 + * @param input input bytes
   33.34 + * @param n number of bytes
   33.35 + * @return mangled value
   33.36 + */
   33.37 +static u32 mangle(void *input, int n){
   33.38 +    return hash_hvoid(0, input, n);
   33.39 +}
   33.40 +
   33.41  /** Generate a random spi.
   33.42   * Uses a hashed counter.
   33.43   *
   33.44   * @return spi
   33.45   */
   33.46  static u32 random_spi(void){
   33.47 -    unsigned long left, right = 0;
   33.48      u32 spi;
   33.49      do{
   33.50 -        left = sa_spi_counter++;
   33.51 -        pseudo_des(&left, &right);
   33.52 -        spi = right;
   33.53 +        spi = sa_spi_counter++;
   33.54 +        spi = mangle(&spi, sizeof(spi));
   33.55      } while(!spi);
   33.56      return spi;
   33.57  }
   33.58  
   33.59 -/** Mangle some input to generate output.
   33.60 - * This is used to derive spis and keying material from secrets,
   33.61 - * so it probably ought to be cryptographically strong.
   33.62 - * Probably ought to use a good hash (sha1) or cipher (aes).
   33.63 - *
   33.64 - * @param input input values
   33.65 - * @param n number of values
   33.66 - * @return mangled value
   33.67 - */
   33.68 -static u32 mangle(u32 input[], int n){
   33.69 -    unsigned long left = 0, right = 0;
   33.70 -    int i;
   33.71 -    for(i=0; i<n; i++){
   33.72 -        left ^= input[i];
   33.73 -        pseudo_des(&left, &right);
   33.74 -    }
   33.75 -    return (u32)right;
   33.76 -}
   33.77 -
   33.78 -/** Generate a spi for a given protocol and address, using a secret key.
   33.79 - * The offset is used when it is necessary to generate more than one spi
   33.80 - * for the same protocol and address.
   33.81 - *
   33.82 - * @param key key
   33.83 - * @param offset offset
   33.84 - * @param protocol protocol
   33.85 - * @param addr IP address
   33.86 - * @return spi
   33.87 - */
   33.88 + /** Generate a spi for a given protocol and address, using a secret key.
   33.89 +  * The offset is used when it is necessary to generate more than one spi
   33.90 +  * for the same protocol and address.
   33.91 +  *
   33.92 +  * @param key key
   33.93 +  * @param offset offset
   33.94 +  * @param protocol protocol
   33.95 +  * @param addr IP address
   33.96 +  * @return spi
   33.97 +  */
   33.98  static u32 generate_spi(u32 key, u32 offset, u32 protocol, u32 addr){
   33.99      u32 input[] = { key, offset, protocol, addr };
  33.100 -    u32 spi;
  33.101 -    dprintf(">\n");
  33.102 -    spi = mangle(input, 4);
  33.103 -    dprintf("< spi=%x\n", spi);
  33.104 -    return spi;
  33.105 +    return mangle(input, sizeof(input));
  33.106  }
  33.107  
  33.108  /** Generate keying material for a given spi, based on a
  33.109 @@ -184,7 +166,7 @@ static u32 generate_spi(u32 key, u32 off
  33.110   */
  33.111  static u32 generate_key(u32 key, u32 offset, u32 spi){
  33.112      u32 input[] = { key, offset, spi };
  33.113 -    return mangle(input, 3);
  33.114 +    return mangle(input, sizeof(input));
  33.115  }    
  33.116  
  33.117  /** Allocate a spi.
  33.118 @@ -238,7 +220,7 @@ static u32 sa_id = 1;
  33.119   * @return hashcode
  33.120   */
  33.121  static inline Hashcode sa_table_hash_id(u32 id){
  33.122 -    return hash_ul(id);
  33.123 +    return hash_hvoid(0, &id, sizeof(id));
  33.124  }
  33.125  
  33.126  /** Hash SA spi/protocol/addr.
  33.127 @@ -249,10 +231,8 @@ static inline Hashcode sa_table_hash_id(
  33.128   * @return hashcode
  33.129   */
  33.130  static inline Hashcode sa_table_hash_spi(u32 spi, u32 protocol, u32 addr){
  33.131 -    Hashcode h = 0;
  33.132 -    h = hash_2ul(spi, protocol);
  33.133 -    h = hash_hul(h, addr);
  33.134 -    return h;
  33.135 +    u32 a[] = { spi, protocol, addr };
  33.136 +    return hash_hvoid(0, a, sizeof(a));
  33.137  }
  33.138  
  33.139  /** Test if an SA entry has a given value.
  33.140 @@ -299,7 +279,7 @@ static int sa_table_spi_fn(TableArg arg,
  33.141   * @param table containing table
  33.142   * @param entry to free
  33.143   */
  33.144 -void sa_table_free_fn(HashTable *table, HTEntry *entry){
  33.145 +static void sa_table_free_fn(HashTable *table, HTEntry *entry){
  33.146      if(!entry) return;
  33.147      if(entry->value){
  33.148          SAState *state = entry->value;
  33.149 @@ -668,3 +648,110 @@ int sa_delete(int id){
  33.150    exit:
  33.151      return err;
  33.152  }
  33.153 +/** Determine ESP security mode for a new SA.
  33.154 + *
  33.155 + * @param spi incoming spi
  33.156 + * @param protocol incoming protocol
  33.157 + * @param addr source address
  33.158 + * @return security level or negative error code
  33.159 + *
  33.160 + * @todo Need to check spi, and do some lookup for security params.
  33.161 + */
  33.162 +int vnet_sa_security(u32 spi, int protocol, u32 addr){
  33.163 +    extern int vnet_security_default;
  33.164 +    int security = vnet_security_default;
  33.165 +    dprintf("< security=%x\n", security);
  33.166 +    return security;
  33.167 +}
  33.168 +
  33.169 +/** Create a new SA for incoming traffic.
  33.170 + *
  33.171 + * @param spi incoming spi
  33.172 + * @param protocol incoming protocol
  33.173 + * @param addr source address
  33.174 + * @param sa return parameter for SA
  33.175 + * @return 0 on success, error code otherwise
  33.176 + */
  33.177 +int vnet_sa_create(u32 spi, int protocol, u32 addr, SAState **sa){
  33.178 +    int err = 0;
  33.179 +    int security = vnet_sa_security(spi, protocol, addr);
  33.180 +    if(security < 0){
  33.181 +        err = security;
  33.182 +        goto exit;
  33.183 +    }
  33.184 +    err = sa_create(security, spi, protocol, addr, sa);
  33.185 +  exit:
  33.186 +    return err;
  33.187 +}
  33.188 +/** Open function for SA tunnels.
  33.189 + *
  33.190 + * @param tunnel to open
  33.191 + * @return 0 on success, error code otherwise
  33.192 + */
  33.193 +static int sa_tunnel_open(Tunnel *tunnel){
  33.194 +    int err = 0;
  33.195 +    //dprintf(">\n");
  33.196 +    //dprintf("< err=%d\n", err);
  33.197 +    return err;
  33.198 +}
  33.199 +
  33.200 +/** Close function for SA tunnels.
  33.201 + *
  33.202 + * @param tunnel to close (OK if null)
  33.203 + */
  33.204 +static void sa_tunnel_close(Tunnel *tunnel){
  33.205 +    SAState *sa;
  33.206 +    if(!tunnel) return;
  33.207 +    sa = tunnel->data;
  33.208 +    if(!sa) return;
  33.209 +    SAState_decref(sa);
  33.210 +    tunnel->data = NULL;
  33.211 +}
  33.212 +
  33.213 +/** Packet send function for SA tunnels.
  33.214 + *
  33.215 + * @param tunnel to send on
  33.216 + * @param skb packet to send
  33.217 + * @return 0 on success, negative error code on error
  33.218 + */
  33.219 +static int sa_tunnel_send(Tunnel *tunnel, struct sk_buff *skb){
  33.220 +    int err = -EINVAL;
  33.221 +    SAState *sa;
  33.222 +    if(!tunnel){
  33.223 +        wprintf("> Null tunnel!\n");
  33.224 +        goto exit;
  33.225 +    }
  33.226 +    sa = tunnel->data;
  33.227 +    if(!sa){
  33.228 +        wprintf("> Null SA!\n");
  33.229 +        goto exit;
  33.230 +    }
  33.231 +    err = SAState_send(sa, skb, tunnel->base);
  33.232 +  exit:
  33.233 +    return err;
  33.234 +}
  33.235 +
  33.236 +/** Functions used by SA tunnels. */
  33.237 +static TunnelType _sa_tunnel_type = {
  33.238 +    .name	= "SA",
  33.239 +    .open	= sa_tunnel_open,
  33.240 +    .close	= sa_tunnel_close,
  33.241 +    .send 	= sa_tunnel_send
  33.242 +};
  33.243 +
  33.244 +/** Functions used by SA tunnels. */
  33.245 +TunnelType *sa_tunnel_type = &_sa_tunnel_type;
  33.246 +
  33.247 +int sa_tunnel_create(Vnet *info, VarpAddr *addr, Tunnel *base, Tunnel **tunnel){
  33.248 +    int err = 0;
  33.249 +    SAState *sa = NULL;
  33.250 +    //FIXME: Assuming IPv4 for now.
  33.251 +    u32 ipaddr = addr->u.ip4.s_addr;
  33.252 +    err = Tunnel_create(sa_tunnel_type, &info->vnet, addr, base, tunnel);
  33.253 +    if(err) goto exit;
  33.254 +    err = sa_create(info->security, 0, IPPROTO_ESP, ipaddr, &sa);
  33.255 +    if(err) goto exit;
  33.256 +    (*tunnel)->data = sa;
  33.257 +  exit:
  33.258 +    return err;
  33.259 +}
    34.1 --- a/tools/vnet/vnet-module/sa.h	Thu Feb 09 16:09:00 2006 +0100
    34.2 +++ b/tools/vnet/vnet-module/sa.h	Thu Feb 09 16:12:11 2006 +0100
    34.3 @@ -19,16 +19,29 @@
    34.4  #ifndef __VNET_SA_H__
    34.5  #define __VNET_SA_H__
    34.6  
    34.7 +#ifdef __KERNEL__
    34.8  #include <linux/types.h>
    34.9  #include <linux/crypto.h>
   34.10  
   34.11 -#include <tunnel.h>
   34.12 +#else
   34.13 +
   34.14 +#include "sys_kernel.h"
   34.15 +
   34.16 +#endif
   34.17 +
   34.18 +struct Vnet;
   34.19 +struct VarpAddr;
   34.20 +struct Tunnel;
   34.21  
   34.22  #ifndef CRYPTO_MAX_KEY_BYTES
   34.23  #define CRYPTO_MAX_KEY_BYTES            64
   34.24  #define CRYPTO_MAX_KEY_BITS             (CRYPTO_MAX_KEY_BYTES * 8)
   34.25  #endif
   34.26  
   34.27 +#ifndef CRYPTO_MAX_ALG_NAME
   34.28 +#define CRYPTO_MAX_ALG_NAME		64
   34.29 +#endif
   34.30 +
   34.31  typedef struct SALimits {
   34.32      u64 bytes_soft;
   34.33      u64 bytes_hard;
   34.34 @@ -104,7 +117,7 @@ typedef struct SAType {
   34.35      int (*init)(SAState *state, void *args);
   34.36      void (*fini)(SAState *state);
   34.37      int (*recv)(SAState *state, struct sk_buff *skb);
   34.38 -    int (*send)(SAState *state, struct sk_buff *skb, Tunnel *tunnel);
   34.39 +    int (*send)(SAState *state, struct sk_buff *skb, struct Tunnel *tunnel);
   34.40      u32 (*size)(SAState *state, int size);
   34.41  } SAType;
   34.42  
   34.43 @@ -170,7 +183,7 @@ extern SAState *SAState_alloc(void);
   34.44  extern int SAState_init(SAIdent *id, SAState **statep);
   34.45  extern int SAState_create(SAInfo *info, SAState **statep);
   34.46  
   34.47 -static inline int SAState_send(SAState *sa, struct sk_buff *skb, Tunnel *tunnel){
   34.48 +static inline int SAState_send(SAState *sa, struct sk_buff *skb, struct Tunnel *tunnel){
   34.49      return sa->type->send(sa, skb, tunnel);
   34.50  }
   34.51  
   34.52 @@ -196,4 +209,7 @@ enum {
   34.53      SA_STATE_VALID   = 2,
   34.54  };
   34.55  
   34.56 +extern int sa_tunnel_create(struct Vnet *info, struct VarpAddr *addr,
   34.57 +                            struct Tunnel *base, struct Tunnel **tunnel);
   34.58 +
   34.59  #endif /* !__VNET_SA_H__ */
    35.1 --- a/tools/vnet/vnet-module/skb_context.h	Thu Feb 09 16:09:00 2006 +0100
    35.2 +++ b/tools/vnet/vnet-module/skb_context.h	Thu Feb 09 16:12:11 2006 +0100
    35.3 @@ -20,11 +20,25 @@
    35.4  #ifndef __VNET_SKB_CONTEXT_H__
    35.5  #define __VNET_SKB_CONTEXT_H__
    35.6  
    35.7 +#ifdef __KERNEL__
    35.8  #include <linux/config.h>
    35.9  #include <linux/kernel.h>
   35.10  #include <asm/atomic.h>
   35.11  #include <linux/types.h>
   35.12  
   35.13 +//todo: fixme
   35.14 +#define SKB_CONTEXT(_skb) ((SkbContext *)(&(_skb)->cb[0]))
   35.15 +
   35.16 +#else
   35.17 +
   35.18 +#include "sys_kernel.h"
   35.19 +#include "spinlock.h"
   35.20 +
   35.21 +//todo: fixme
   35.22 +#define SKB_CONTEXT(_skb) ((SkbContext *)NULL)
   35.23 +
   35.24 +#endif
   35.25 +
   35.26  /** Structure used to record inbound processing path for skbs.
   35.27   * For example, the ETHERIP protocol handler can use this to
   35.28   * tell whether an inbound packet came through IPSEC ESP or not.
   35.29 @@ -70,7 +84,4 @@ struct sk_buff;
   35.30  extern int skb_push_context(struct sk_buff *skb, u32 vnet, u32 addr, int protocol,
   35.31                              void *data, void (*free_fn)(SkbContext *));
   35.32  
   35.33 -//todo: fixme
   35.34 -#define SKB_CONTEXT(_skb) ((SkbContext *)(&(_skb)->cb[0]))
   35.35 -
   35.36  #endif /* !__VNET_SKB_CONTEXT_H__ */ 
    36.1 --- a/tools/vnet/vnet-module/skb_util.c	Thu Feb 09 16:09:00 2006 +0100
    36.2 +++ b/tools/vnet/vnet-module/skb_util.c	Thu Feb 09 16:12:11 2006 +0100
    36.3 @@ -1,5 +1,5 @@
    36.4  /*
    36.5 - * Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
    36.6 + * Copyright (C) 2004, 2005 Mike Wray <mike.wray@hp.com>
    36.7   *
    36.8   * This program is free software; you can redistribute it and/or modify
    36.9   * it under the terms of the GNU General Public License as published by the 
   36.10 @@ -16,6 +16,7 @@
   36.11   * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA
   36.12   *
   36.13   */
   36.14 +#ifdef __KERNEL__
   36.15  #include <linux/config.h>
   36.16  #include <linux/module.h>
   36.17  #include <linux/kernel.h>
   36.18 @@ -39,6 +40,44 @@
   36.19  #include <net/route.h>
   36.20  #include <linux/skbuff.h>
   36.21  
   36.22 +#else
   36.23 +
   36.24 +#include <stdlib.h>
   36.25 +#include <stdbool.h>
   36.26 +#include <stdint.h>
   36.27 +#include <unistd.h>
   36.28 +#include <stdio.h>
   36.29 +#include <errno.h>
   36.30 +
   36.31 +#include <netinet/in.h>
   36.32 +#include <arpa/inet.h>
   36.33 +
   36.34 +#include <sys/types.h>
   36.35 +#include <sys/socket.h>
   36.36 +
   36.37 +#include <linux/if_ether.h>
   36.38 +#include <linux/if_arp.h>
   36.39 +#include <linux/ip.h>
   36.40 +#include <linux/tcp.h>
   36.41 +#include <linux/udp.h>
   36.42 +
   36.43 +#include "sys_kernel.h"
   36.44 +#include "skbuff.h"
   36.45 +
   36.46 +#if defined(__LITTLE_ENDIAN)
   36.47 +#define HIPQUAD(addr) \
   36.48 +	((unsigned char *)&addr)[3], \
   36.49 +	((unsigned char *)&addr)[2], \
   36.50 +	((unsigned char *)&addr)[1], \
   36.51 +	((unsigned char *)&addr)[0]
   36.52 +#elif defined(__BIG_ENDIAN)
   36.53 +#define HIPQUAD	NIPQUAD
   36.54 +#else
   36.55 +#error "Please fix asm/byteorder.h"
   36.56 +#endif /* __LITTLE_ENDIAN */
   36.57 +
   36.58 +#endif
   36.59 +
   36.60  #include <varp.h>
   36.61  #include <skb_util.h>
   36.62  
   36.63 @@ -47,16 +86,7 @@
   36.64  #undef DEBUG
   36.65  #include "debug.h"
   36.66  
   36.67 -static const int DEBUG_SCATTERLIST = 0;
   36.68 -static const int DEBUG_SKB = 0;
   36.69 -
   36.70  //============================================================================
   36.71 -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
   36.72 -#define SET_SCATTER_ADDR(sg, addr) do{} while(0)
   36.73 -#else
   36.74 -#define SET_SCATTER_ADDR(sg, addr) (sg).address = (addr)
   36.75 -#endif
   36.76 -
   36.77  /** Make enough room in an skb for extra header and trailer.
   36.78   *
   36.79   * @param pskb return parameter for expanded skb
   36.80 @@ -85,7 +115,7 @@ int skb_make_room(struct sk_buff **pskb,
   36.81              err = -ENOMEM;
   36.82              goto exit;
   36.83          }
   36.84 -        dev_kfree_skb(skb);
   36.85 +        kfree_skb(skb);
   36.86          *pskb = new_skb;
   36.87      } else {
   36.88          // No room. Expand. There may be more efficient ways to do
   36.89 @@ -95,7 +125,7 @@ int skb_make_room(struct sk_buff **pskb,
   36.90              err = -ENOMEM;
   36.91              goto exit;
   36.92          }
   36.93 -        dev_kfree_skb(skb);
   36.94 +        kfree_skb(skb);
   36.95          *pskb = new_skb;
   36.96      }
   36.97      dprintf("> skb=%p headroom=%d head_n=%d tailroom=%d tail_n=%d\n",
   36.98 @@ -129,6 +159,7 @@ int skb_put_bits(const struct sk_buff *s
   36.99          src += copy;
  36.100      }
  36.101  
  36.102 +#ifdef __KERNEL__
  36.103      for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
  36.104          int end;
  36.105  
  36.106 @@ -177,6 +208,10 @@ int skb_put_bits(const struct sk_buff *s
  36.107              start = end;
  36.108          }
  36.109      }
  36.110 +#else
  36.111 +    i=0;
  36.112 +#endif
  36.113 +
  36.114      if (len == 0)
  36.115          return 0;
  36.116  
  36.117 @@ -184,45 +219,11 @@ int skb_put_bits(const struct sk_buff *s
  36.118      return -EFAULT;
  36.119  }
  36.120  
  36.121 -/** Add some space to the end of a (possibly fragmented) skb.
  36.122 - *
  36.123 - * Only works with Xen output skbs.  Output skbs have 1 frag, and we
  36.124 - * add another frag for the extra space.
  36.125 - *
  36.126 - * @param skb skb
  36.127 - * @param n number of bytes to add
  36.128 - * @return 0 on success, error code otherwise 
  36.129 - *
  36.130 - * @todo fixme
  36.131 - */
  36.132 -int pskb_put(struct sk_buff *skb, int n){
  36.133 -    int err = 0;
  36.134 -    if(1 || skb_is_nonlinear(skb)){
  36.135 -        struct skb_shared_info *info = skb_shinfo(skb);
  36.136 -        char *ptr = NULL;
  36.137 -
  36.138 -        if(info->nr_frags >= MAX_SKB_FRAGS){
  36.139 -            err = -ENOMEM;
  36.140 -            goto exit;
  36.141 -        }
  36.142 -        ptr = kmalloc(n, GFP_ATOMIC);
  36.143 -        if(!ptr){
  36.144 -            err = -ENOMEM;
  36.145 -            goto exit;
  36.146 -        }
  36.147 -        info->nr_frags++;
  36.148 -        info->frags[info->nr_frags - 1].page = virt_to_page(ptr);
  36.149 -        info->frags[info->nr_frags - 1].page_offset = ((unsigned long)ptr & ~PAGE_MASK);
  36.150 -        info->frags[info->nr_frags - 1].size = n;
  36.151 -
  36.152 -        skb->data_len += n;
  36.153 -        skb->len += n;
  36.154 -    } else {
  36.155 -        __skb_put(skb, n);
  36.156 +int skboffset(struct sk_buff *skb, unsigned char *ptr){
  36.157 +    if(!ptr || ptr < skb->head || ptr > skb->tail){
  36.158 +        return -1;
  36.159      }
  36.160 -  exit:
  36.161 -    if(err) dprintf("< err=%d\n", err);
  36.162 -    return err;
  36.163 +    return (ptr - skb->head);
  36.164  }
  36.165  
  36.166  /** Print some bits of an skb.
  36.167 @@ -231,11 +232,23 @@ int pskb_put(struct sk_buff *skb, int n)
  36.168   * @param offset byte offset to start printing at
  36.169   * @param n number of bytes to print
  36.170   */
  36.171 -void skb_print_bits(struct sk_buff *skb, int offset, int n){
  36.172 +void skb_print_bits(const char *msg, struct sk_buff *skb, int offset, int n){
  36.173      int chunk = 16;
  36.174      int i, k;
  36.175      u8 buff[chunk];
  36.176 -    if(!DEBUG_SKB) return;
  36.177 +    if(!skb) return;
  36.178 +    printk("%s> tot=%d len=%d data=%d mac=%d nh=%d h=%d\n",
  36.179 +           msg,
  36.180 +           skb->tail - skb->head,
  36.181 +           skb->len,
  36.182 +           skboffset(skb, skb->data),
  36.183 +           skboffset(skb, skb->mac.raw),
  36.184 +           skboffset(skb, skb->nh.raw),
  36.185 +           skboffset(skb, skb->h.raw));
  36.186 +    printk("%s> head=%p data=%p mac=%p nh=%p h=%p tail=%p\n",
  36.187 +           msg, skb->head, skb->data,
  36.188 +           skb->mac.raw, skb->nh.raw, skb->h.raw,
  36.189 +           skb->tail);
  36.190      while(n){
  36.191          k = (n > chunk ? chunk : n);
  36.192          skb_copy_bits(skb, offset, buff, k);
  36.193 @@ -275,8 +288,15 @@ void *skb_trim_tail(struct sk_buff *skb,
  36.194      return skb->tail;
  36.195  }
  36.196  
  36.197 -// #define BUG_TRAP(x)
  36.198 -// if(!(x)){ printk("KERNEL: assertion (" #x ") failed at " __FILE__ "(%d)\n", __LINE__); }
  36.199 +#ifdef __KERNEL__
  36.200 +
  36.201 +static const int DEBUG_SCATTERLIST = 0;
  36.202 +
  36.203 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
  36.204 +#define SET_SCATTER_ADDR(sg, addr) do{} while(0)
  36.205 +#else
  36.206 +#define SET_SCATTER_ADDR(sg, addr) (sg).address = (addr)
  36.207 +#endif
  36.208  
  36.209  /** Convert a (possibly fragmented) skb into a scatter list.
  36.210   *
  36.211 @@ -360,27 +380,9 @@ int skb_scatterlist(struct sk_buff *skb,
  36.212      return err;
  36.213  }
  36.214  
  36.215 -struct arpheader
  36.216 -{
  36.217 -	unsigned short	ar_hrd;		/* format of hardware address	*/
  36.218 -	unsigned short	ar_pro;		/* format of protocol address	*/
  36.219 -	unsigned char	ar_hln;		/* length of hardware address	*/
  36.220 -	unsigned char	ar_pln;		/* length of protocol address	*/
  36.221 -	unsigned short	ar_op;		/* ARP opcode (command)		*/
  36.222 -
  36.223 -#if 1
  36.224 -	 /*
  36.225 -	  *	 Ethernet looks like this : This bit is variable sized however...
  36.226 -	  */
  36.227 -	unsigned char		ar_sha[ETH_ALEN];	/* sender hardware address	*/
  36.228 -	unsigned char		ar_sip[4];		/* sender IP address		*/
  36.229 -	unsigned char		ar_tha[ETH_ALEN];	/* target hardware address	*/
  36.230 -	unsigned char		ar_tip[4];		/* target IP address		*/
  36.231  #endif
  36.232  
  36.233 -};
  36.234 -
  36.235 -void print_skb_data(char *msg, int count, struct sk_buff *skb, u8 *data, int len)
  36.236 +void print_skb_data(const char *msg, int count, struct sk_buff *skb, u8 *data, int len)
  36.237  {
  36.238      static int skb_count = 1000000;
  36.239      u8 *ptr, *end;
  36.240 @@ -460,7 +462,7 @@ void print_skb_data(char *msg, int count
  36.241                     msg, count, nh.iph->protocol,
  36.242                     HIPQUAD(src_addr), HIPQUAD(dst_addr));
  36.243              printk("%s.%d> IP tot_len=%u len=%d\n",
  36.244 -                   msg, count, nh.iph->tot_len & 0xffff, len - ETH_HLEN);
  36.245 +                   msg, count, ntohs(nh.iph->tot_len), len - ETH_HLEN);
  36.246          }
  36.247          ptr += (nh.iph->ihl * 4);
  36.248          if(ptr > end){ printk ("***IP: len"); goto exit; }
  36.249 @@ -506,10 +508,49 @@ void print_skb_data(char *msg, int count
  36.250      return;
  36.251    exit:
  36.252      printk("%s.%d> %s: skb problem\n", msg, count, __FUNCTION__);
  36.253 -    printk("%s.%d> %s: data=%p end=%p(%d) ptr=%p(%d) eth=%d arp=%d ip=%d\n",
  36.254 +    printk("%s.%d> %s: data=%p end=%p(%d) ptr=%p(%d) eth=%d ip=%d\n",
  36.255             msg, count, __FUNCTION__,
  36.256             data, end, end - data, ptr, ptr - data,
  36.257 -           sizeof(struct ethhdr), sizeof(struct arphdr), sizeof(struct iphdr));
  36.258 +           sizeof(struct ethhdr),
  36.259 +           sizeof(struct iphdr));
  36.260      return;
  36.261  }
  36.262  
  36.263 +void print_skb(const char *msg, int count, struct sk_buff *skb){
  36.264 +    print_skb_data(msg, count, skb, skb->mac.raw, skb->tail - skb->mac.raw);
  36.265 +}
  36.266 +
  36.267 +void print_ethhdr(const char *msg, struct sk_buff *skb){
  36.268 +    struct ethhdr *eth;
  36.269 +
  36.270 +    if(!skb || skboffset(skb, skb->mac.raw) < 0) return;
  36.271 +    eth = eth_hdr(skb);
  36.272 +    printk("%s> ETH proto=%d src=" MACFMT " dst=" MACFMT "\n",
  36.273 +           msg,
  36.274 +           ntohs(eth->h_proto),
  36.275 +           MAC6TUPLE(eth->h_source),
  36.276 +           MAC6TUPLE(eth->h_dest));
  36.277 +}
  36.278 +
  36.279 +void print_iphdr(const char *msg, struct sk_buff *skb){
  36.280 +    u32 src_addr, dst_addr;
  36.281 +    
  36.282 +    if(!skb || skboffset(skb, skb->nh.raw) < 0) return;
  36.283 +    src_addr = ntohl(skb->nh.iph->saddr);
  36.284 +    dst_addr = ntohl(skb->nh.iph->daddr);
  36.285 +    printk("%s> IP proto=%d src=" IPFMT " dst=" IPFMT " tot_len=%u\n",
  36.286 +           msg,
  36.287 +           skb->nh.iph->protocol,
  36.288 +           HIPQUAD(src_addr),
  36.289 +           HIPQUAD(dst_addr),
  36.290 +           ntohs(skb->nh.iph->tot_len));
  36.291 +}
  36.292 +
  36.293 +void print_udphdr(const char *msg, struct sk_buff *skb){
  36.294 +    if(!skb || skboffset(skb, skb->h.raw) < 0) return;
  36.295 +    printk("%s> UDP src=%u dst=%u len=%u\n",
  36.296 +           msg,
  36.297 +           ntohs(skb->h.uh->source),
  36.298 +           ntohs(skb->h.uh->dest),
  36.299 +           ntohs(skb->h.uh->len));
  36.300 +}
    37.1 --- a/tools/vnet/vnet-module/skb_util.h	Thu Feb 09 16:09:00 2006 +0100
    37.2 +++ b/tools/vnet/vnet-module/skb_util.h	Thu Feb 09 16:12:11 2006 +0100
    37.3 @@ -1,5 +1,5 @@
    37.4  /*
    37.5 - * Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
    37.6 + * Copyright (C) 2004, 2005 Mike Wray <mike.wray@hp.com>
    37.7   *
    37.8   * This program is free software; you can redistribute it and/or modify
    37.9   * it under the terms of the GNU General Public License as published by the 
   37.10 @@ -19,40 +19,62 @@
   37.11  #ifndef _VNET_SKB_UTIL_H_
   37.12  #define _VNET_SKB_UTIL_H_
   37.13  
   37.14 +#ifdef __KERNEL__
   37.15  #include <net/route.h>
   37.16  #include <linux/skbuff.h>
   37.17  
   37.18 -struct scatterlist;
   37.19 +#else
   37.20 +
   37.21 +#include "skbuff.h"
   37.22 +
   37.23 +#endif
   37.24 +
   37.25 +struct sk_buff;
   37.26  
   37.27  extern int skb_make_room(struct sk_buff **pskb, struct sk_buff *skb, int head_n, int tail_n);
   37.28  
   37.29  extern int skb_put_bits(const struct sk_buff *skb, int offset, void *src, int len);
   37.30  
   37.31 -extern int pskb_put(struct sk_buff *skb, int n);
   37.32 -
   37.33 -extern void skb_print_bits(struct sk_buff *skb, int offset, int n);
   37.34 +extern void skb_print_bits(const char *msg, struct sk_buff *skb, int offset, int n);
   37.35  
   37.36  extern void buf_print(char *buf, int n);
   37.37  
   37.38  extern void *skb_trim_tail(struct sk_buff *skb, int n);
   37.39  
   37.40 -extern int skb_scatterlist(struct sk_buff *skb, struct scatterlist *sg,
   37.41 -                           int *sg_n, int offset, int len);
   37.42 +extern void print_skb_data(const char *msg, int count, struct sk_buff *skb, u8 *data, int len);
   37.43 +extern void print_skb(const char *msg, int count, struct sk_buff *skb);
   37.44  
   37.45 -extern void print_skb_data(char *msg, int count, struct sk_buff *skb, u8 *data, int len);
   37.46 -
   37.47 +extern void print_ethhdr(const char *msg, struct sk_buff *skb);
   37.48 +extern void print_iphdr(const char *msg, struct sk_buff *skb);
   37.49 +extern void print_udphdr(const char *msg, struct sk_buff *skb);
   37.50  
   37.51  /* The mac.ethernet field went away in 2.6 in favour of eth_hdr().
   37.52   */
   37.53 -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
   37.54 +#ifdef __KERNEL__
   37.55 +#  if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
   37.56 +#    define NEED_ETH_HDR
   37.57 +#  endif
   37.58  #else
   37.59 +#  define NEED_ETH_HDR
   37.60 +#endif
   37.61 +
   37.62 +#ifdef NEED_ETH_HDR
   37.63 +
   37.64  static inline struct ethhdr *eth_hdr(const struct sk_buff *skb)
   37.65  {
   37.66  	return (struct ethhdr *)skb->mac.raw;
   37.67  }
   37.68 +
   37.69  #endif
   37.70  
   37.71  
   37.72 +#ifdef __KERNEL__
   37.73 +
   37.74 +struct scatterlist;
   37.75 +
   37.76 +extern int skb_scatterlist(struct sk_buff *skb, struct scatterlist *sg,
   37.77 +                           int *sg_n, int offset, int len);
   37.78 +
   37.79  #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
   37.80  
   37.81  static inline int skb_route(struct sk_buff *skb, struct rtable **prt){
   37.82 @@ -91,4 +113,27 @@ static inline int skb_route(struct sk_bu
   37.83  
   37.84  #endif
   37.85  
   37.86 +#endif /* __KERNEL__ */
   37.87 +
   37.88 +/** Arp header struct with all the fields so we can access them. */
   37.89 +struct arpheader
   37.90 +{
   37.91 +	unsigned short	ar_hrd;		/* format of hardware address	*/
   37.92 +	unsigned short	ar_pro;		/* format of protocol address	*/
   37.93 +	unsigned char	ar_hln;		/* length of hardware address	*/
   37.94 +	unsigned char	ar_pln;		/* length of protocol address	*/
   37.95 +	unsigned short	ar_op;		/* ARP opcode (command)		*/
   37.96 +
   37.97 +#if 1
   37.98 +	 /*
   37.99 +	  *	 Ethernet looks like this : This bit is variable sized however...
  37.100 +	  */
  37.101 +	unsigned char		ar_sha[ETH_ALEN];	/* sender hardware address	*/
  37.102 +	unsigned char		ar_sip[4];		/* sender IP address		*/
  37.103 +	unsigned char		ar_tha[ETH_ALEN];	/* target hardware address	*/
  37.104 +	unsigned char		ar_tip[4];		/* target IP address		*/
  37.105  #endif
  37.106 +
  37.107 +};
  37.108 +
  37.109 +#endif /* ! _VNET_SKB_UTIL_H_ */
    38.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    38.2 +++ b/tools/vnet/vnet-module/sxpr_util.c	Thu Feb 09 16:12:11 2006 +0100
    38.3 @@ -0,0 +1,119 @@
    38.4 +/*
    38.5 + * Copyright (C) 2005 Mike Wray <mike.wray@hp.com>
    38.6 + *
    38.7 + * This program is free software; you can redistribute it and/or modify
    38.8 + * it under the terms of the GNU General Public License as published by the 
    38.9 + * Free Software Foundation; either version 2 of the License, or (at your
   38.10 + * option) any later version.
   38.11 + * 
   38.12 + * This program is distributed in the hope that it will be useful, but
   38.13 + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
   38.14 + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
   38.15 + * for more details.
   38.16 + *
   38.17 + * You should have received a copy of the GNU General Public License along
   38.18 + * with this program; if not, write to the Free software Foundation, Inc.,
   38.19 + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA
   38.20 + *
   38.21 + */
   38.22 +#include "sys_net.h"
   38.23 +#include "if_varp.h"
   38.24 +#include "varp_util.h"
   38.25 +#include "sxpr_util.h"
   38.26 +
   38.27 +int stringof(Sxpr exp, char **s){
   38.28 +    int err = 0;
   38.29 +    if(ATOMP(exp)){
   38.30 +        *s = atom_name(exp);
   38.31 +    } else if(STRINGP(exp)){
   38.32 +        *s = string_string(exp);
   38.33 +    } else {
   38.34 +        err = -EINVAL;
   38.35 +        *s = NULL;
   38.36 +    }
   38.37 +    return err;
   38.38 +}
   38.39 +
   38.40 +int child_string(Sxpr exp, Sxpr key, char **s){
   38.41 +    int err = 0;
   38.42 +    Sxpr val = sxpr_child_value(exp, key, ONONE);
   38.43 +    err = stringof(val, s);
   38.44 +    return err;
   38.45 +}
   38.46 +
   38.47 +int intof(Sxpr exp, int *v){
   38.48 +    int err = 0;
   38.49 +    char *s;
   38.50 +    unsigned long l;
   38.51 +    if(INTP(exp)){
   38.52 +        *v = OBJ_INT(exp);
   38.53 +    } else {
   38.54 +        err = stringof(exp, &s);
   38.55 +        if(err) goto exit;
   38.56 +        err = convert_atoul(s, &l);
   38.57 +        *v = (int)l;
   38.58 +    }
   38.59 + exit:
   38.60 +    return err;
   38.61 +}
   38.62 +
   38.63 +int child_int(Sxpr exp, Sxpr key, int *v){
   38.64 +    int err = 0;
   38.65 +    Sxpr val = sxpr_child_value(exp, key, ONONE);
   38.66 +    err = intof(val, v);
   38.67 +    return err;
   38.68 +}
   38.69 +
   38.70 +int vnetof(Sxpr exp, VnetId *v){
   38.71 +    int err = 0;
   38.72 +    char *s;
   38.73 +    err = stringof(exp, &s);
   38.74 +    if(err) goto exit;
   38.75 +    err = VnetId_aton(s, v);
   38.76 +  exit:
   38.77 +    return err;
   38.78 +}
   38.79 +
   38.80 +int child_vnet(Sxpr exp, Sxpr key, VnetId *v){
   38.81 +    int err = 0;
   38.82 +    Sxpr val = sxpr_child_value(exp, key, ONONE);
   38.83 +    err = vnetof(val, v);
   38.84 +    return err;
   38.85 +}
   38.86 +
   38.87 +int macof(Sxpr exp, unsigned char *v){
   38.88 +    int err = 0;
   38.89 +    char *s;
   38.90 +    err = stringof(exp, &s);
   38.91 +    if(err) goto exit;
   38.92 +    err = mac_aton(s, v);
   38.93 +  exit:
   38.94 +    return err;
   38.95 +}
   38.96 +
   38.97 +int child_mac(Sxpr exp, Sxpr key, unsigned char *v){
   38.98 +    int err = 0;
   38.99 +    Sxpr val = sxpr_child_value(exp, key, ONONE);
  38.100 +    err = macof(val, v);
  38.101 +    return err;
  38.102 +}
  38.103 +
  38.104 +int addrof(Sxpr exp, uint32_t *v){
  38.105 +    int err = 0;
  38.106 +    char *s;
  38.107 +    unsigned long w;
  38.108 +    err = stringof(exp, &s);
  38.109 +    if(err) goto exit;
  38.110 +    err = get_inet_addr(s, &w);
  38.111 +    if(err) goto exit;
  38.112 +    *v = (uint32_t)w;
  38.113 +  exit:
  38.114 +    return err;
  38.115 +}
  38.116 +
  38.117 +int child_addr(Sxpr exp, Sxpr key, uint32_t *v){
  38.118 +    int err = 0;
  38.119 +    Sxpr val = sxpr_child_value(exp, key, ONONE);
  38.120 +    err = addrof(val, v);
  38.121 +    return err;
  38.122 +}
    39.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    39.2 +++ b/tools/vnet/vnet-module/sxpr_util.h	Thu Feb 09 16:12:11 2006 +0100
    39.3 @@ -0,0 +1,36 @@
    39.4 +/*
    39.5 + * Copyright (C) 2005 Mike Wray <mike.wray@hp.com>
    39.6 + *
    39.7 + * This program is free software; you can redistribute it and/or modify
    39.8 + * it under the terms of the GNU General Public License as published by the 
    39.9 + * Free Software Foundation; either version 2 of the License, or (at your
   39.10 + * option) any later version.
   39.11 + * 
   39.12 + * This program is distributed in the hope that it will be useful, but
   39.13 + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
   39.14 + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
   39.15 + * for more details.
   39.16 + *
   39.17 + * You should have received a copy of the GNU General Public License along
   39.18 + * with this program; if not, write to the Free software Foundation, Inc.,
   39.19 + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA
   39.20 + *
   39.21 + */
   39.22 +#ifndef _SXPR_UTIL_H_
   39.23 +#define _SXPR_UTIL__H_
   39.24 +
   39.25 +#include "sxpr.h"
   39.26 +struct VnetId;
   39.27 +
   39.28 +int stringof(Sxpr exp, char **s);
   39.29 +int child_string(Sxpr exp, Sxpr key, char **s);
   39.30 +int intof(Sxpr exp, int *v);
   39.31 +int child_int(Sxpr exp, Sxpr key, int *v);
   39.32 +int vnetof(Sxpr exp, struct VnetId *v);
   39.33 +int child_vnet(Sxpr exp, Sxpr key, struct VnetId *v);
   39.34 +int macof(Sxpr exp, unsigned char *v);
   39.35 +int child_mac(Sxpr exp, Sxpr key, unsigned char *v);
   39.36 +int addrof(Sxpr exp, uint32_t *v);
   39.37 +int child_addr(Sxpr exp, Sxpr key, uint32_t *v);
   39.38 +
   39.39 +#endif /* ! _SXPR_UTIL_H_ */
    40.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    40.2 +++ b/tools/vnet/vnet-module/timer_util.c	Thu Feb 09 16:12:11 2006 +0100
    40.3 @@ -0,0 +1,74 @@
    40.4 +/*
    40.5 + * Copyright (C) 2005 Mike Wray <mike.wray@hp.com>
    40.6 + *
    40.7 + * This program is free software; you can redistribute it and/or modify
    40.8 + * it under the terms of the GNU General Public License as published by the 
    40.9 + * Free Software Foundation; either version 2 of the License, or (at your
   40.10 + * option) any later version.
   40.11 + * 
   40.12 + * This program is distributed in the hope that it will be useful, but
   40.13 + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
   40.14 + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
   40.15 + * for more details.
   40.16 + *
   40.17 + * You should have received a copy of the GNU General Public License along
   40.18 + * with this program; if not, write to the Free software Foundation, Inc.,
   40.19 + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA
   40.20 + *
   40.21 + */
   40.22 +
   40.23 +#ifdef __KERNEL__
   40.24 +#include <linux/config.h>
   40.25 +#include <linux/kernel.h>
   40.26 +#include <linux/module.h>
   40.27 +#include <linux/init.h>
   40.28 +#include <linux/string.h>
   40.29 +#include <linux/version.h>
   40.30 +
   40.31 +#include <linux/spinlock.h>
   40.32 +#include <asm/semaphore.h>
   40.33 +
   40.34 +#else
   40.35 +
   40.36 +#include "sys_kernel.h"
   40.37 +#include "spinlock.h"
   40.38 +
   40.39 +#endif
   40.40 +
   40.41 +#include "timer_util.h"
   40.42 +
   40.43 +#define MODULE_NAME "TIMER"
   40.44 +#define DEBUG 1
   40.45 +#undef DEBUG
   40.46 +#include "debug.h"
   40.47 +
   40.48 +#ifdef __KERNEL__
   40.49 +
   40.50 +void timer_init(struct timer_list *timer, void (*fn)(unsigned long), void *data){
   40.51 +    init_timer(timer);
   40.52 +    timer->data = (unsigned long)data;
   40.53 +    timer->function = fn;
   40.54 +}
   40.55 +
   40.56 +void timer_set(struct timer_list *timer, unsigned long ttl){
   40.57 +    unsigned long now = jiffies;
   40.58 +    timer->expires = now + ttl;
   40.59 +    add_timer(timer);
   40.60 +}
   40.61 +
   40.62 +#else
   40.63 +
   40.64 +void timer_init(struct Timer *timer, void (*fn)(unsigned long), void *data){
   40.65 +    *timer = (struct Timer){};
   40.66 +    timer->data = (unsigned long)data;
   40.67 +    timer->fn = fn;
   40.68 +}
   40.69 +
   40.70 +void timer_set(struct Timer *timer, unsigned long ttl){
   40.71 +    double now = time_now();
   40.72 +    timer->expiry = now + (double)ttl/(double)HZ;
   40.73 +    Timer_cancel(timer);
   40.74 +    Timer_add(timer);
   40.75 +}
   40.76 +
   40.77 +#endif
    41.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    41.2 +++ b/tools/vnet/vnet-module/timer_util.h	Thu Feb 09 16:12:11 2006 +0100
    41.3 @@ -0,0 +1,41 @@
    41.4 +/*
    41.5 + * Copyright (C) 2005 Mike Wray <mike.wray@hp.com>
    41.6 + *
    41.7 + * This program is free software; you can redistribute it and/or modify
    41.8 + * it under the terms of the GNU General Public License as published by the 
    41.9 + * Free Software Foundation; either version 2 of the License, or (at your
   41.10 + * option) any later version.
   41.11 + * 
   41.12 + * This program is distributed in the hope that it will be useful, but
   41.13 + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
   41.14 + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
   41.15 + * for more details.
   41.16 + *
   41.17 + * You should have received a copy of the GNU General Public License along
   41.18 + * with this program; if not, write to the Free software Foundation, Inc.,
   41.19 + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA
   41.20 + *
   41.21 + */
   41.22 +
   41.23 +#ifndef _VNET_TIMER_UTIL_H_
   41.24 +#define _VNET_TIMER_UTIL_H_
   41.25 +
   41.26 +#ifdef __KERNEL__
   41.27 +
   41.28 +struct timer_list;
   41.29 +#define timer_cancel del_timer
   41.30 +
   41.31 +#else /* __KERNEL__ */
   41.32 +
   41.33 +#include "timer.h"
   41.34 +#define timer_list   Timer
   41.35 +#define HZ           1000
   41.36 +#define jiffies      (unsigned long)(time_now() * HZ)
   41.37 +#define timer_cancel Timer_cancel
   41.38 +
   41.39 +#endif /* __KERNEL__ */
   41.40 +
   41.41 +void timer_init(struct timer_list *timer, void (*fn)(unsigned long), void *data);
   41.42 +void timer_set(struct timer_list *timer, unsigned long ttl);
   41.43 +
   41.44 +#endif /*! _VNET_TIMER_UTIL_H_ */
    42.1 --- a/tools/vnet/vnet-module/tunnel.c	Thu Feb 09 16:09:00 2006 +0100
    42.2 +++ b/tools/vnet/vnet-module/tunnel.c	Thu Feb 09 16:12:11 2006 +0100
    42.3 @@ -1,5 +1,5 @@
    42.4  /*
    42.5 - * Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
    42.6 + * Copyright (C) 2004, 2005 Mike Wray <mike.wray@hp.com>
    42.7   *
    42.8   * This program is free software; you can redistribute it and/or modify
    42.9   * it under the terms of the GNU General Public License as published by the 
   42.10 @@ -16,19 +16,21 @@
   42.11   * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA
   42.12   *
   42.13   */
   42.14 +#ifdef __KERNEL__
   42.15 +
   42.16  #include <linux/config.h>
   42.17  #include <linux/module.h>
   42.18  #include <linux/init.h>
   42.19 +#include <linux/skbuff.h>
   42.20 +#include <linux/spinlock.h>
   42.21  
   42.22 -#include <linux/net.h>
   42.23 -#include <linux/in.h>
   42.24 -#include <linux/inet.h>
   42.25 -#include <linux/netdevice.h>
   42.26 +#else
   42.27  
   42.28 -#include <net/ip.h>
   42.29 -#include <net/protocol.h>
   42.30 -#include <net/route.h>
   42.31 -#include <linux/skbuff.h>
   42.32 +#include "sys_kernel.h"
   42.33 +#include "spinlock.h"
   42.34 +#include "skbuff.h"
   42.35 +
   42.36 +#endif
   42.37  
   42.38  #include <tunnel.h>
   42.39  #include <vnet.h>
   42.40 @@ -40,9 +42,18 @@
   42.41  #undef DEBUG
   42.42  #include "debug.h"
   42.43  
   42.44 +/** Table of tunnels, indexed by vnet and addr. */
   42.45 +HashTable *tunnel_table = NULL;
   42.46 +rwlock_t tunnel_table_lock = RW_LOCK_UNLOCKED;
   42.47 +
   42.48 +#define tunnel_read_lock(flags)    read_lock_irqsave(&tunnel_table_lock, (flags))
   42.49 +#define tunnel_read_unlock(flags)  read_unlock_irqrestore(&tunnel_table_lock, (flags))
   42.50 +#define tunnel_write_lock(flags)   write_lock_irqsave(&tunnel_table_lock, (flags))
   42.51 +#define tunnel_write_unlock(flags) write_unlock_irqrestore(&tunnel_table_lock, (flags))
   42.52 +
   42.53  void Tunnel_print(Tunnel *tunnel){
   42.54      if(tunnel){
   42.55 -        printk("Tunnel<%p base=%p ref=%02d type=%s>\n",
   42.56 +        iprintf("Tunnel<%p base=%p ref=%02d type=%s>\n",
   42.57                 tunnel,
   42.58                 tunnel->base,
   42.59                 atomic_read(&tunnel->refcount),
   42.60 @@ -51,12 +62,13 @@ void Tunnel_print(Tunnel *tunnel){
   42.61              Tunnel_print(tunnel->base);
   42.62          }
   42.63      } else {
   42.64 -        printk("Tunnel<%p base=%p ref=%02d type=%s>\n",
   42.65 +        iprintf("Tunnel<%p base=%p ref=%02d type=%s>\n",
   42.66                 NULL, NULL, 0, "ip");
   42.67      }
   42.68  }
   42.69  
   42.70 -int Tunnel_create(TunnelType *type, VnetId *vnet, VarpAddr *addr, Tunnel *base, Tunnel **val){
   42.71 +int Tunnel_create(TunnelType *type, VnetId *vnet, VarpAddr *addr,
   42.72 +                  Tunnel *base, Tunnel **val){
   42.73      int err = 0;
   42.74      Tunnel *tunnel = NULL;
   42.75      if(!type || !type->open || !type->send || !type->close){
   42.76 @@ -87,22 +99,6 @@ int Tunnel_create(TunnelType *type, Vnet
   42.77      return err;
   42.78  }
   42.79  
   42.80 -int Tunnel_open(TunnelType *type, VnetId *vnet, VarpAddr *addr, Tunnel *base, Tunnel **tunnel){
   42.81 -    int err = 0;
   42.82 -
   42.83 -    dprintf(">\n");
   42.84 -    err = Tunnel_create(type, vnet, addr, base, tunnel);
   42.85 -    if(err) goto exit;
   42.86 -    err = Tunnel_add(*tunnel);
   42.87 -  exit:
   42.88 -    if(err){
   42.89 -        Tunnel_decref(*tunnel);
   42.90 -        *tunnel = NULL;
   42.91 -    }
   42.92 -    dprintf("< err=%d\n", err);
   42.93 -    return err;
   42.94 -}
   42.95 -
   42.96  void TunnelStats_update(TunnelStats *stats, int len, int err){
   42.97      dprintf(">len=%d  err=%d\n", len, err);
   42.98      if(err){
   42.99 @@ -115,29 +111,18 @@ void TunnelStats_update(TunnelStats *sta
  42.100      dprintf("<\n");
  42.101  }
  42.102  
  42.103 -/** Table of tunnels, indexed by vnet and addr. */
  42.104 -HashTable *tunnel_table = NULL;
  42.105 -
  42.106  static inline Hashcode tunnel_table_key_hash_fn(void *k){
  42.107 -    TunnelKey *key = k;
  42.108 -    Hashcode h = 0;
  42.109 -    h = VnetId_hash(h, &key->vnet);
  42.110 -    h = VarpAddr_hash(h, &key->addr);
  42.111 -    return h;
  42.112 +    return hash_hvoid(0, k, sizeof(TunnelKey));
  42.113  }
  42.114  
  42.115  static int tunnel_table_key_equal_fn(void *k1, void *k2){
  42.116 -    TunnelKey *key1 = k1;
  42.117 -    TunnelKey *key2 = k2;
  42.118 -    return VnetId_eq(&key1->vnet, &key2->vnet) &&
  42.119 -           VarpAddr_eq(&key1->addr, &key2->addr);
  42.120 +    return memcmp(k1, k2, sizeof(TunnelKey)) == 0;
  42.121  }
  42.122  
  42.123  static void tunnel_table_entry_free_fn(HashTable *table, HTEntry *entry){
  42.124      Tunnel *tunnel;
  42.125      if(!entry) return;
  42.126      tunnel = entry->value;
  42.127 -    //dprintf(">\n"); Tunnel_print(tunnel);
  42.128      Tunnel_decref(tunnel);
  42.129      HTEntry_free(entry);
  42.130  }
  42.131 @@ -159,35 +144,86 @@ int Tunnel_init(void){
  42.132  }
  42.133      
  42.134  /** Lookup tunnel state by vnet and destination.
  42.135 + * The caller must drop the tunnel reference when done.
  42.136   *
  42.137   * @param vnet vnet
  42.138   * @param addr destination address
  42.139 - * @return tunnel state or NULL
  42.140 + * @return 0 on success
  42.141   */
  42.142 -Tunnel * Tunnel_lookup(VnetId *vnet, VarpAddr *addr){
  42.143 +int Tunnel_lookup(VnetId *vnet, VarpAddr *addr, Tunnel **tunnel){
  42.144 +    unsigned long flags;
  42.145 +    TunnelKey key = { .vnet = *vnet, .addr = *addr };
  42.146 +    dprintf(">\n");
  42.147 +    tunnel_read_lock(flags);
  42.148 +    *tunnel = HashTable_get(tunnel_table, &key);
  42.149 +    tunnel_read_unlock(flags);
  42.150 +    Tunnel_incref(*tunnel);
  42.151 +    dprintf("< tunnel=%p\n", *tunnel);
  42.152 +    return (*tunnel ? 0 : -ENOENT);
  42.153 +}
  42.154 +
  42.155 +/** Get a tunnel to a given vnet and destination, creating
  42.156 + * a tunnel if necessary.
  42.157 + * The caller must drop the tunnel reference when done.
  42.158 + *
  42.159 + * @param vnet vnet
  42.160 + * @param addr destination address
  42.161 + * @param ctor tunnel constructor
  42.162 + * @parma ptunnel return parameter for the tunnel
  42.163 + * @return 0 on success
  42.164 + */
  42.165 +int Tunnel_open(VnetId *vnet, VarpAddr *addr,
  42.166 +                int (*ctor)(VnetId *vnet, VarpAddr *addr, Tunnel **ptunnel),
  42.167 +                Tunnel **ptunnel){
  42.168 +    int err = 0;
  42.169      Tunnel *tunnel = NULL;
  42.170 -    TunnelKey key = {.vnet = *vnet, .addr = *addr };
  42.171 -    dprintf(">\n");
  42.172 +    unsigned long flags;
  42.173 +    TunnelKey key = { .vnet = *vnet, .addr = *addr };
  42.174 +
  42.175 +    tunnel_write_lock(flags);
  42.176      tunnel = HashTable_get(tunnel_table, &key);
  42.177 -    Tunnel_incref(tunnel);
  42.178 -    dprintf("< tunnel=%p\n", tunnel);
  42.179 -    return tunnel;
  42.180 +    if(!tunnel){
  42.181 +        err = ctor(vnet, addr, &tunnel);
  42.182 +        if(err) goto exit;
  42.183 +        if(!HashTable_add(tunnel_table, tunnel, tunnel)){
  42.184 +            err = -ENOMEM;
  42.185 +            goto exit;
  42.186 +        }
  42.187 +    }
  42.188 +  exit:
  42.189 +    tunnel_write_unlock(flags);
  42.190 +    if(err){
  42.191 +        Tunnel_decref(tunnel);
  42.192 +        *ptunnel = NULL;
  42.193 +    } else {
  42.194 +        Tunnel_incref(tunnel);
  42.195 +        *ptunnel = tunnel;
  42.196 +    }
  42.197 +    return err;
  42.198  }
  42.199  
  42.200  int Tunnel_add(Tunnel *tunnel){
  42.201      int err = 0;
  42.202 +    unsigned long flags;
  42.203      dprintf(">\n");
  42.204 +    tunnel_write_lock(flags);
  42.205      if(HashTable_add(tunnel_table, tunnel, tunnel)){
  42.206          Tunnel_incref(tunnel);   
  42.207      } else {
  42.208          err = -ENOMEM;
  42.209      }
  42.210 +    tunnel_write_unlock(flags);
  42.211      dprintf("< err=%d\n", err);
  42.212      return err;
  42.213  }
  42.214  
  42.215  int Tunnel_del(Tunnel *tunnel){
  42.216 -    return HashTable_remove(tunnel_table, tunnel);
  42.217 +    int err;
  42.218 +    unsigned long flags;
  42.219 +    tunnel_write_lock(flags);
  42.220 +    err = HashTable_remove(tunnel_table, tunnel);
  42.221 +    tunnel_write_unlock(flags);
  42.222 +    return err;
  42.223  }
  42.224  
  42.225  /** Do tunnel send processing on a packet.
  42.226 @@ -217,8 +253,11 @@ int __init tunnel_module_init(void){
  42.227  }
  42.228  
  42.229  void __exit tunnel_module_exit(void){
  42.230 +    unsigned long flags;
  42.231 +    tunnel_write_lock(flags);
  42.232      if(tunnel_table){
  42.233          HashTable_free(tunnel_table);
  42.234          tunnel_table = NULL;
  42.235      }
  42.236 +    tunnel_write_unlock(flags);
  42.237  }
    43.1 --- a/tools/vnet/vnet-module/tunnel.h	Thu Feb 09 16:09:00 2006 +0100
    43.2 +++ b/tools/vnet/vnet-module/tunnel.h	Thu Feb 09 16:12:11 2006 +0100
    43.3 @@ -1,5 +1,5 @@
    43.4  /*
    43.5 - * Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
    43.6 + * Copyright (C) 2004, 2005 Mike Wray <mike.wray@hp.com>
    43.7   *
    43.8   * This program is free software; you can redistribute it and/or modify
    43.9   * it under the terms of the GNU General Public License as published by the 
   43.10 @@ -19,9 +19,18 @@
   43.11  #ifndef __VNET_TUNNEL_H__
   43.12  #define __VNET_TUNNEL_H__
   43.13  
   43.14 +#ifdef __KERNEL__
   43.15  #include <linux/types.h>
   43.16 -#include <linux/slab.h>
   43.17  #include <asm/atomic.h>
   43.18 +
   43.19 +#else
   43.20 +
   43.21 +//#include <linux/types.h>
   43.22 +#include "sys_kernel.h"
   43.23 +#include "spinlock.h"
   43.24 +
   43.25 +#endif
   43.26 +
   43.27  #include <if_varp.h>
   43.28  
   43.29  struct sk_buff;
   43.30 @@ -42,8 +51,8 @@ typedef struct TunnelStats {
   43.31  } TunnelStats;
   43.32  
   43.33  typedef struct TunnelKey {
   43.34 -    VnetId vnet;
   43.35 -    VarpAddr addr;
   43.36 +    struct VnetId vnet;
   43.37 +    struct VarpAddr addr;
   43.38  } TunnelKey;
   43.39  
   43.40  typedef struct Tunnel {
   43.41 @@ -61,17 +70,13 @@ typedef struct Tunnel {
   43.42      struct Tunnel *base;
   43.43  } Tunnel;
   43.44  
   43.45 -extern void Tunnel_print(Tunnel *tunnel);
   43.46 -
   43.47  /** Decrement the reference count, freeing if zero.
   43.48   *
   43.49   * @param tunnel tunnel (may be null)
   43.50   */
   43.51 -static inline void Tunnel_decref(Tunnel *tunnel){
   43.52 +static inline void Tunnel_decref(struct Tunnel *tunnel){
   43.53      if(!tunnel) return;
   43.54      if(atomic_dec_and_test(&tunnel->refcount)){
   43.55 -        printk("%s> Closing tunnel:\n", __FUNCTION__);
   43.56 -        Tunnel_print(tunnel);
   43.57          tunnel->type->close(tunnel);
   43.58          Tunnel_decref(tunnel->base);
   43.59          kfree(tunnel);
   43.60 @@ -88,15 +93,19 @@ static inline void Tunnel_incref(Tunnel 
   43.61  }
   43.62  
   43.63  extern int Tunnel_init(void);
   43.64 -extern Tunnel * Tunnel_lookup(struct VnetId *vnet, struct VarpAddr *addr);
   43.65 -extern int Tunnel_add(Tunnel *tunnel);
   43.66 -extern int Tunnel_del(Tunnel *tunnel);
   43.67 -extern int Tunnel_send(Tunnel *tunnel, struct sk_buff *skb);
   43.68 +extern int Tunnel_lookup(struct VnetId *vnet, struct VarpAddr *addr, struct Tunnel **tunnel);
   43.69 +extern int Tunnel_open(struct VnetId *vnet, struct VarpAddr *addr,
   43.70 +                       int (*ctor)(struct VnetId *vnet,
   43.71 +                                   struct VarpAddr *addr,
   43.72 +                                   struct Tunnel **ptunnel),
   43.73 +                       struct Tunnel **ptunnel);
   43.74 +extern int Tunnel_add(struct Tunnel *tunnel);
   43.75 +extern int Tunnel_del(struct Tunnel *tunnel);
   43.76 +extern void Tunnel_print(struct Tunnel *tunnel);
   43.77 +extern int Tunnel_send(struct Tunnel *tunnel, struct sk_buff *skb);
   43.78  
   43.79 -extern int Tunnel_create(TunnelType *type, struct VnetId *vnet, struct VarpAddr *addr,
   43.80 -                         Tunnel *base, Tunnel **tunnelp);
   43.81 -extern int Tunnel_open(TunnelType *type, struct VnetId *vnet, struct VarpAddr *addr,
   43.82 -                       Tunnel *base, Tunnel **tunnelp);
   43.83 +extern int Tunnel_create(struct TunnelType *type, struct VnetId *vnet, struct VarpAddr *addr,
   43.84 +                         struct Tunnel *base, struct Tunnel **tunnelp);
   43.85  
   43.86  extern int tunnel_module_init(void);
   43.87  extern void tunnel_module_exit(void);
    44.1 --- a/tools/vnet/vnet-module/varp.c	Thu Feb 09 16:09:00 2006 +0100
    44.2 +++ b/tools/vnet/vnet-module/varp.c	Thu Feb 09 16:12:11 2006 +0100
    44.3 @@ -1,5 +1,5 @@
    44.4  /*
    44.5 - * Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
    44.6 + * Copyright (C) 2004, 2005 Mike Wray <mike.wray@hp.com>
    44.7   *
    44.8   * This program is free software; you can redistribute it and/or modify
    44.9   * it under the terms of the GNU General Public License as published by the 
   44.10 @@ -17,6 +17,7 @@
   44.11   *
   44.12   */
   44.13  
   44.14 +#ifdef __KERNEL__
   44.15  #include <linux/config.h>
   44.16  #include <linux/kernel.h>
   44.17  #include <linux/module.h>
   44.18 @@ -28,6 +29,7 @@
   44.19  #include <linux/in.h>
   44.20  #include <linux/inet.h>
   44.21  #include <linux/netdevice.h>
   44.22 +#include <linux/inetdevice.h>
   44.23  #include <linux/udp.h>
   44.24  
   44.25  #include <net/ip.h>
   44.26 @@ -37,18 +39,35 @@
   44.27  #include <linux/spinlock.h>
   44.28  #include <asm/semaphore.h>
   44.29  
   44.30 +#else
   44.31 +
   44.32 +#include "sys_kernel.h"
   44.33 +#include <netinet/in.h>
   44.34 +#include <arpa/inet.h>
   44.35 +#include <linux/ip.h>
   44.36 +#include <linux/udp.h>
   44.37 +#include "spinlock.h"
   44.38 +#include "skbuff.h"
   44.39 +
   44.40 +#endif
   44.41 +
   44.42  #include <tunnel.h>
   44.43  #include <vnet.h>
   44.44  #include <vif.h>
   44.45  #include <if_varp.h>
   44.46  #include <varp.h>
   44.47 +#include <varp_util.h>
   44.48  #include <vnet.h>
   44.49 +#include <etherip.h>
   44.50 +#include <vnet_forward.h>
   44.51  
   44.52  #include "allocate.h"
   44.53 +#include "iostream.h"
   44.54  #include "hash_table.h"
   44.55  #include "sys_net.h"
   44.56  #include "sys_string.h"
   44.57  #include "skb_util.h"
   44.58 +#include "timer_util.h"
   44.59  
   44.60  #define MODULE_NAME "VARP"
   44.61  #define DEBUG 1
   44.62 @@ -104,7 +123,7 @@ on probe timer:
   44.63  enum {
   44.64      VARP_STATE_INCOMPLETE = 1,
   44.65      VARP_STATE_REACHABLE = 2,
   44.66 -    VARP_STATE_FAILED = 3
   44.67 +    VARP_STATE_FAILED = 3,
   44.68  };
   44.69  
   44.70  /** Varp entry flags. */
   44.71 @@ -137,6 +156,8 @@ typedef struct VarpEntry {
   44.72      atomic_t refcount;
   44.73      /** Lock. */
   44.74      rwlock_t lock;
   44.75 +    unsigned long lflags;
   44.76 +
   44.77      /** How many probes have been made. */
   44.78      atomic_t probes;
   44.79      /** Probe timer. */
   44.80 @@ -146,6 +167,7 @@ typedef struct VarpEntry {
   44.81      struct sk_buff_head queue;
   44.82      /** Maximum size of the queue. */
   44.83      int queue_max;
   44.84 +    atomic_t deleted;
   44.85  } VarpEntry;
   44.86  
   44.87  /** The varp cache. Varp entries indexed by VarpKey. */
   44.88 @@ -156,11 +178,13 @@ typedef struct VarpTable {
   44.89      /** Sweep timer. */
   44.90      struct timer_list timer;
   44.91  
   44.92 -    /** Lock. Need to use a semaphore instead of a spinlock because
   44.93 -     * some operations under the varp table lock can schedule - and
   44.94 -     * you mustn't hold a spinlock when scheduling.
   44.95 -     */
   44.96 -    struct semaphore lock;
   44.97 +    rwlock_t lock;
   44.98 +    struct semaphore mutex;
   44.99 +
  44.100 +    int entry_ttl;
  44.101 +    int probe_max;
  44.102 +    int probe_interval;
  44.103 +    int queue_max;
  44.104  
  44.105  } VarpTable;
  44.106  
  44.107 @@ -176,19 +200,30 @@ u32 varp_mcast_addr = 0;
  44.108  /** UDP port (network order). */
  44.109  u16 varp_port = 0;
  44.110  
  44.111 -char *varp_device = "xenbr0";
  44.112 +char *varp_device = "xen-br0";
  44.113  
  44.114 -#define VarpTable_read_lock(z, flags)    do{ (flags) = 0; down(&(z)->lock); } while(0)
  44.115 -#define VarpTable_read_unlock(z, flags)  do{ (flags) = 0; up(&(z)->lock); } while(0)
  44.116 -#define VarpTable_write_lock(z, flags)   do{ (flags) = 0; down(&(z)->lock); } while(0)
  44.117 -#define VarpTable_write_unlock(z, flags) do{ (flags) = 0; up(&(z)->lock); } while(0)
  44.118 +#define VarpTable_read_lock(vtable, flags)    \
  44.119 +  do{ read_lock_irqsave(&(vtable)->lock, (flags)); } while(0)
  44.120  
  44.121 -#define VarpEntry_lock(ventry, flags)    write_lock_irqsave(&(ventry)->lock, (flags))
  44.122 -#define VarpEntry_unlock(ventry, flags)  write_unlock_irqrestore(&(ventry)->lock, (flags))
  44.123 +#define VarpTable_read_unlock(vtable, flags)  \
  44.124 +  do{ read_unlock_irqrestore(&(vtable)->lock, (flags)); } while(0)
  44.125  
  44.126 -void VarpTable_sweep(VarpTable *z, int all);
  44.127 -void VarpTable_flush(VarpTable *z);
  44.128 -void VarpTable_print(VarpTable *z);
  44.129 +#define VarpTable_write_lock(vtable, flags)    \
  44.130 +  do{ write_lock_irqsave(&(vtable)->lock, (flags)); } while(0)
  44.131 +
  44.132 +#define VarpTable_write_unlock(vtable, flags)  \
  44.133 +  do{ write_unlock_irqrestore(&(vtable)->lock, (flags)); } while(0)
  44.134 +
  44.135 +#define VarpEntry_lock(ventry, flags)    \
  44.136 +  do{ write_lock_irqsave(&(ventry)->lock, (flags)); (ventry)->lflags = (flags); } while(0)
  44.137 +
  44.138 +#define VarpEntry_unlock(ventry, flags)  \
  44.139 +  do{ (flags) = (ventry)->lflags; write_unlock_irqrestore(&(ventry)->lock, (flags)); } while(0)
  44.140 +
  44.141 +void VarpTable_sweep(VarpTable *vtable);
  44.142 +void VarpTable_flush(VarpTable *vtable);
  44.143 +void VarpTable_print(VarpTable *vtable, IOStream *io);
  44.144 +int VarpEntry_output(VarpEntry *ventry, struct sk_buff *skb);
  44.145  
  44.146  #include "./varp_util.c"
  44.147  
  44.148 @@ -196,7 +231,7 @@ void VarpTable_print(VarpTable *z);
  44.149   */
  44.150  void varp_dprint(void){
  44.151  #ifdef DEBUG
  44.152 -    VarpTable_print(varp_table);
  44.153 +    VarpTable_print(varp_table, iostdout);
  44.154  #endif
  44.155  } 
  44.156  
  44.157 @@ -206,6 +241,7 @@ void varp_flush(void){
  44.158      VarpTable_flush(varp_table);
  44.159  }
  44.160  
  44.161 +#ifdef __KERNEL__
  44.162  static int device_ucast_addr(const char *device, uint32_t *addr)
  44.163  {
  44.164      int err;
  44.165 @@ -234,23 +270,6 @@ int varp_ucast_addr(uint32_t *addr)
  44.166      return err;
  44.167  }
  44.168  
  44.169 -/** Print varp info and the varp cache.
  44.170 - */
  44.171 -void varp_print(void){
  44.172 -    uint32_t addr = 0;
  44.173 -    varp_ucast_addr(&addr);
  44.174 -
  44.175 -    printk(KERN_INFO "=== VARP ===============================================================\n");
  44.176 -    printk(KERN_INFO "varp_device     %s\n", varp_device);
  44.177 -    printk(KERN_INFO "varp_mcast_addr " IPFMT "\n", NIPQUAD(varp_mcast_addr));
  44.178 -    printk(KERN_INFO "varp_ucast_addr " IPFMT "\n", NIPQUAD(addr));
  44.179 -    printk(KERN_INFO "varp_port       %d\n", ntohs(varp_port));
  44.180 -    vnet_print();
  44.181 -    vif_print();
  44.182 -    VarpTable_print(varp_table);
  44.183 -    printk(KERN_INFO "========================================================================\n");
  44.184 -}
  44.185 -
  44.186  /** Lookup a network device by name.
  44.187   *
  44.188   * @param name device name
  44.189 @@ -287,6 +306,35 @@ int vnet_get_device_address(struct net_d
  44.190      return err;
  44.191  }
  44.192  
  44.193 +#else
  44.194 +
  44.195 +int varp_ucast_addr(uint32_t *addr)
  44.196 +{
  44.197 +    return 0;
  44.198 +}
  44.199 +
  44.200 +#endif
  44.201 +
  44.202 +/** Print varp info and the varp cache.
  44.203 + */
  44.204 +void varp_print(IOStream *io){
  44.205 +    uint32_t addr = 0;
  44.206 +    varp_ucast_addr(&addr);
  44.207 +
  44.208 +    IOStream_print(io, "(varp \n");
  44.209 +    IOStream_print(io, " (device %s)\n", varp_device);
  44.210 +    IOStream_print(io, " (mcast_addr " IPFMT ")\n", NIPQUAD(varp_mcast_addr));
  44.211 +    IOStream_print(io, " (ucast_addr " IPFMT ")\n", NIPQUAD(addr));
  44.212 +    IOStream_print(io, " (port %d)\n", ntohs(varp_port));
  44.213 +    IOStream_print(io, " (encapsulation %s)\n",
  44.214 +                   (etherip_in_udp ? "etherip_in_udp" : "etherip"));
  44.215 +    IOStream_print(io, " (entry_ttl %lu)\n", varp_table->entry_ttl);
  44.216 +    IOStream_print(io, ")\n");
  44.217 +    VarpTable_print(varp_table, io);
  44.218 +}
  44.219 +
  44.220 +#ifdef __KERNEL__
  44.221 +
  44.222  #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
  44.223  
  44.224  static inline int addr_route(u32 daddr, struct rtable **prt){
  44.225 @@ -312,13 +360,20 @@ static inline int addr_route(u32 daddr, 
  44.226      return err;
  44.227  }
  44.228  
  44.229 -#endif
  44.230 +#endif // LINUX_VERSION_CODE
  44.231  
  44.232  #ifndef LL_RESERVED_SPACE
  44.233  #define HH_DATA_MOD	16
  44.234  #define LL_RESERVED_SPACE(dev) \
  44.235          ((dev->hard_header_len & ~(HH_DATA_MOD - 1)) + HH_DATA_MOD)
  44.236 -#endif
  44.237 +
  44.238 +#endif // LL_RESERVED_SPACE
  44.239 +
  44.240 +#else // __KERNEL__
  44.241 +
  44.242 +#define ip_eth_mc_map(daddr, dmac) do{ }while(0)
  44.243 +
  44.244 +#endif // __KERNEL__
  44.245  
  44.246  /** Send a varp protocol message.
  44.247   *
  44.248 @@ -337,12 +392,11 @@ int varp_send(u16 opcode, struct net_dev
  44.249      int udp_n = sizeof(struct udphdr);
  44.250      int varp_n = sizeof(VarpHdr);
  44.251      struct sk_buff *skbout = NULL;
  44.252 -    struct in_device *in_dev = NULL;
  44.253      VarpHdr *varph = NULL;
  44.254 -    u8 macbuf[6] = {};
  44.255 -    u8 *smac, *dmac = macbuf;
  44.256 -    u32 saddr, daddr;
  44.257 -    u16 sport, dport;
  44.258 +    u8 smacbuf[6] = {}, dmacbuf[6] = {};
  44.259 +    u8 *smac = smacbuf, *dmac = dmacbuf;
  44.260 +    u32 saddr = 0, daddr = 0;
  44.261 +    u16 sport = 0, dport = 0;
  44.262  #if defined(DEBUG)
  44.263      char vnetbuf[VNET_ID_BUF];
  44.264  #endif
  44.265 @@ -365,28 +419,38 @@ int varp_send(u16 opcode, struct net_dev
  44.266          sport = varp_port;
  44.267      }
  44.268  
  44.269 -    if(!dev){
  44.270 -        struct rtable *rt = NULL;
  44.271 -        err = addr_route(daddr, &rt);
  44.272 -        if(err) goto exit;
  44.273 -        dev = rt->u.dst.dev;
  44.274 +#ifdef __KERNEL__
  44.275 +    {
  44.276 +        struct in_device *in_dev = NULL;
  44.277 +        if(!dev){
  44.278 +            struct rtable *rt = NULL;
  44.279 +            err = addr_route(daddr, &rt);
  44.280 +            if(err) goto exit;
  44.281 +            dev = rt->u.dst.dev;
  44.282 +        }
  44.283 +        
  44.284 +        in_dev = in_dev_get(dev);
  44.285 +        if(!in_dev){
  44.286 +            err = -ENODEV;
  44.287 +            goto exit;
  44.288 +        }
  44.289 +        link_n = LL_RESERVED_SPACE(dev);
  44.290 +        saddr = in_dev->ifa_list->ifa_address;
  44.291 +        smac = dev->dev_addr;
  44.292 +        if(daddr == INADDR_BROADCAST){
  44.293 +            daddr = in_dev->ifa_list->ifa_broadcast;
  44.294 +            dmac = dev->broadcast;
  44.295 +        }
  44.296 +        in_dev_put(in_dev);
  44.297      }
  44.298 +#else
  44.299 +    {
  44.300 +        extern uint32_t vnetd_addr(void); 
  44.301 +        saddr = vnetd_addr();
  44.302 +    }
  44.303 +#endif // __KERNEL__
  44.304  
  44.305 -    in_dev = in_dev_get(dev);
  44.306 -    if(!in_dev){
  44.307 -        err = -ENODEV;
  44.308 -        goto exit;
  44.309 -    }
  44.310 -    link_n = LL_RESERVED_SPACE(dev);
  44.311 -    saddr = in_dev->ifa_list->ifa_address;
  44.312 -    smac = dev->dev_addr;
  44.313 -    if(daddr == INADDR_BROADCAST){
  44.314 -        daddr = in_dev->ifa_list->ifa_broadcast;
  44.315 -        dmac = dev->broadcast;
  44.316 -    }
  44.317 -    in_dev_put(in_dev);
  44.318 -
  44.319 -    dprintf("> dev=%s\n", dev->name);
  44.320 +    dprintf("> dev=%s\n", (dev ? dev->name : "<none>"));
  44.321      dprintf("> smac=" MACFMT " dmac=" MACFMT "\n", MAC6TUPLE(smac), MAC6TUPLE(dmac));
  44.322      dprintf("> saddr=" IPFMT " daddr=" IPFMT "\n", NIPQUAD(saddr), NIPQUAD(daddr));
  44.323      dprintf("> sport=%u dport=%u\n", ntohs(sport), ntohs(dport));
  44.324 @@ -400,12 +464,16 @@ int varp_send(u16 opcode, struct net_dev
  44.325      skb_reserve(skbout, link_n);
  44.326      skbout->protocol = htons(ETH_P_IP);
  44.327  
  44.328 +#ifdef __KERNEL__
  44.329      // Device header. Pushes device header on front of skb.
  44.330      if (dev->hard_header){
  44.331          err = dev->hard_header(skbout, dev, ETH_P_IP, dmac, smac, skbout->len);
  44.332          if(err < 0) goto exit;
  44.333          skbout->mac.raw = skbout->data;
  44.334      }
  44.335 +#else
  44.336 +    smac = smac; // Defeat unused variable warning.
  44.337 +#endif // __KERNEL__
  44.338  
  44.339      // IP header.
  44.340      skbout->nh.raw = skb_put(skbout, ip_n);
  44.341 @@ -446,104 +514,102 @@ int varp_send(u16 opcode, struct net_dev
  44.342      return err;
  44.343  }
  44.344  
  44.345 +
  44.346  /** Send a varp request for the vnet and destination mac of a packet.
  44.347 + * Assumes the ventry is locked.
  44.348   *
  44.349   * @param skb packet
  44.350   * @param vnet vnet (in network order)
  44.351   * @return 0 on success, error code otherwise
  44.352   */
  44.353 -int varp_solicit(struct sk_buff *skb, VnetId *vnet){
  44.354 -    int err = 0;
  44.355 -    err = varp_send(VARP_OP_REQUEST, NULL, NULL,
  44.356 -                    vnet, (Vmac*)eth_hdr(skb)->h_dest);
  44.357 -    return err;
  44.358 +int varp_solicit(VnetId *vnet, Vmac *vmac){
  44.359 +    return varp_send(VARP_OP_REQUEST, NULL, NULL, vnet, vmac);
  44.360  }
  44.361  
  44.362  /* Test some flags.
  44.363   *
  44.364 - * @param z varp entry
  44.365 + * @param ventry varp entry
  44.366   * @param flags to test
  44.367   * @return nonzero if flags set
  44.368   */
  44.369 -int VarpEntry_get_flags(VarpEntry *z, int flags){
  44.370 -    return z->flags & flags;
  44.371 +int VarpEntry_get_flags(VarpEntry *ventry, int flags){
  44.372 +    return ventry->flags & flags;
  44.373  }
  44.374  
  44.375  /** Set some flags.
  44.376   *
  44.377 - * @param z varp entry
  44.378 + * @param ventry varp entry
  44.379   * @param flags to set
  44.380   * @param set set flags on if nonzero, off if zero
  44.381   * @return new flags value
  44.382   */
  44.383 -int VarpEntry_set_flags(VarpEntry *z, int flags, int set){
  44.384 +int VarpEntry_set_flags(VarpEntry *ventry, int flags, int set){
  44.385      if(set){
  44.386 -        z->flags |= flags;
  44.387 +        ventry->flags |= flags;
  44.388      } else {
  44.389 -        z->flags &= ~flags;
  44.390 +        ventry->flags &= ~flags;
  44.391      }
  44.392 -    return z->flags;
  44.393 +    return ventry->flags;
  44.394  }
  44.395  
  44.396  /** Print a varp entry.
  44.397   *
  44.398   * @param ventry varp entry
  44.399   */
  44.400 -void VarpEntry_print(VarpEntry *ventry){
  44.401 +void VarpEntry_print(VarpEntry *ventry, IOStream *io){
  44.402 +    IOStream_print(io, "(ventry \n");
  44.403      if(ventry){
  44.404 +        unsigned long now = jiffies;
  44.405          char *state, *flags;
  44.406          char vnetbuf[VNET_ID_BUF];
  44.407          char addrbuf[VARP_ADDR_BUF];
  44.408  
  44.409          switch(ventry->state){
  44.410 -        case VARP_STATE_INCOMPLETE: state = "INC"; break;
  44.411 -        case VARP_STATE_REACHABLE:  state = "RCH"; break;
  44.412 -        case VARP_STATE_FAILED:     state = "FLD"; break;
  44.413 -        default:                    state = "UNK"; break;
  44.414 +        case VARP_STATE_INCOMPLETE: state = "incomplete"; break;
  44.415 +        case VARP_STATE_REACHABLE:  state = "reachable"; break;
  44.416 +        case VARP_STATE_FAILED:     state = "failed"; break;
  44.417 +        default:                    state = "unknown"; break;
  44.418          }
  44.419 -        flags = (VarpEntry_get_flags(ventry, VARP_FLAG_PROBING) ? "P" : " ");
  44.420 +        flags = (VarpEntry_get_flags(ventry, VARP_FLAG_PROBING) ? "P" : "-");
  44.421  
  44.422 -        printk(KERN_INFO "VENTRY(%p ref=%1d %s %s vnet=%s vmac=" MACFMT
  44.423 -               " addr=%s q=%3d t=%lu)\n",
  44.424 -               ventry,
  44.425 -               atomic_read(&ventry->refcount),
  44.426 -               state, flags,
  44.427 -               VnetId_ntoa(&ventry->key.vnet, vnetbuf),
  44.428 -               MAC6TUPLE(ventry->key.vmac.mac),
  44.429 -               VarpAddr_ntoa(&ventry->addr, addrbuf),
  44.430 -               skb_queue_len(&ventry->queue),
  44.431 -               ventry->timestamp);
  44.432 -    } else {
  44.433 -        printk("VENTRY: Null!\n");
  44.434 +        IOStream_print(io, " (ref %d)\n", atomic_read(&ventry->refcount));
  44.435 +        IOStream_print(io, " (state %s)\n", state);
  44.436 +        IOStream_print(io, " (flags %s)\n", flags);
  44.437 +        IOStream_print(io, " (addr %s)\n", VarpAddr_ntoa(&ventry->addr, addrbuf));
  44.438 +        IOStream_print(io, " (queue %d)\n", skb_queue_len(&ventry->queue));
  44.439 +        IOStream_print(io, " (age %lu)\n", now - ventry->timestamp);
  44.440 +        IOStream_print(io, " (vmac " MACFMT ")\n", MAC6TUPLE(ventry->key.vmac.mac));
  44.441 +        IOStream_print(io, " (vnet %s)\n", VnetId_ntoa(&ventry->key.vnet, vnetbuf));
  44.442      }
  44.443 +    IOStream_print(io, ")\n");
  44.444  }
  44.445  
  44.446  /** Free a varp entry.
  44.447   *
  44.448 - * @param z varp entry
  44.449 + * @param ventry varp entry
  44.450   */
  44.451 -void VarpEntry_free(VarpEntry *z){
  44.452 -    if(!z) return;
  44.453 -    deallocate(z);
  44.454 +static void VarpEntry_free(VarpEntry *ventry){
  44.455 +    if(!ventry) return;
  44.456 +    deallocate(ventry);
  44.457  }
  44.458  
  44.459  /** Increment reference count.
  44.460   *
  44.461 - * @param z varp entry (may be null)
  44.462 + * @param ventry varp entry (may be null)
  44.463   */
  44.464 -void VarpEntry_incref(VarpEntry *z){
  44.465 -    if(!z) return;
  44.466 -    atomic_inc(&z->refcount);
  44.467 +void VarpEntry_incref(VarpEntry *ventry){
  44.468 +    if(!ventry) return;
  44.469 +    atomic_inc(&ventry->refcount);
  44.470  }
  44.471  
  44.472  /** Decrement reference count, freeing if zero.
  44.473   *
  44.474 - * @param z varp entry (may be null)
  44.475 + * @param ventry varp entry (may be null)
  44.476   */
  44.477 -void VarpEntry_decref(VarpEntry *z){
  44.478 -    if(!z) return;
  44.479 -    if(atomic_dec_and_test(&z->refcount)){
  44.480 -        VarpEntry_free(z);
  44.481 +void VarpEntry_decref(VarpEntry *ventry){
  44.482 +    if(!ventry) return;
  44.483 +    if(atomic_dec_and_test(&ventry->refcount)){
  44.484 +        VarpEntry_free(ventry);
  44.485      }
  44.486  }
  44.487  
  44.488 @@ -567,9 +633,7 @@ void VarpEntry_error(VarpEntry *ventry){
  44.489   * @param ventry varp entry
  44.490   */
  44.491  void VarpEntry_schedule(VarpEntry *ventry){
  44.492 -    unsigned long now = jiffies;
  44.493 -    ventry->timer.expires = now + VARP_PROBE_INTERVAL;
  44.494 -    add_timer(&ventry->timer);
  44.495 +    timer_set(&ventry->timer, VARP_PROBE_INTERVAL);
  44.496  }
  44.497  
  44.498  /** Function called when a varp entry timer goes off.
  44.499 @@ -582,36 +646,49 @@ static void varp_timer_fn(unsigned long 
  44.500      unsigned long flags;
  44.501      VarpEntry *ventry = (VarpEntry *)arg;
  44.502      struct sk_buff *skb = NULL;
  44.503 -    int locked = 0, probing = 0;
  44.504 +    int probing = 0;
  44.505  
  44.506 -    dprintf(">\n"); //VarpEntry_print(ventry);
  44.507 +    dprintf(">\n");
  44.508      VarpEntry_lock(ventry, flags);
  44.509 -    locked = 1;
  44.510 -    if(ventry->state == VARP_STATE_REACHABLE){
  44.511 -        // Do nothing.
  44.512 -    } else {
  44.513 -        // Probe if haven't run out of tries, otherwise fail.
  44.514 -        if(atomic_read(&ventry->probes) < VARP_PROBE_MAX){
  44.515 -            probing = 1;
  44.516 -            VarpEntry_schedule(ventry);
  44.517 -            skb = skb_peek(&ventry->queue);
  44.518 -            if(skb){
  44.519 -                dprintf("> skbs in queue - solicit\n");
  44.520 -                atomic_inc(&ventry->probes);
  44.521 -                VarpEntry_unlock(ventry, flags);
  44.522 -                locked = 0;
  44.523 -                varp_solicit(skb, &ventry->key.vnet);
  44.524 +    if(!atomic_read(&ventry->deleted)){
  44.525 +        switch(ventry->state){
  44.526 +        case VARP_STATE_REACHABLE:
  44.527 +        case VARP_STATE_FAILED:
  44.528 +            break;
  44.529 +        case VARP_STATE_INCOMPLETE:
  44.530 +            // Probe if haven't run out of tries, otherwise fail.
  44.531 +            if(atomic_read(&ventry->probes) < VARP_PROBE_MAX){
  44.532 +                unsigned long qflags;
  44.533 +                VnetId vnet;
  44.534 +                Vmac vmac;
  44.535 +
  44.536 +                probing = 1;
  44.537 +                spin_lock_irqsave(&ventry->queue.lock, qflags);
  44.538 +                skb = skb_peek(&ventry->queue);
  44.539 +                if(skb){
  44.540 +                    vmac = *(Vmac*)eth_hdr(skb)->h_dest;
  44.541 +                }
  44.542 +                spin_unlock_irqrestore(&ventry->queue.lock, qflags);
  44.543 +                if(skb){
  44.544 +                    dprintf("> skbs in queue - solicit\n");
  44.545 +                    vnet = ventry->key.vnet;
  44.546 +                    atomic_inc(&ventry->probes);
  44.547 +                    VarpEntry_unlock(ventry, flags);
  44.548 +                    varp_solicit(&vnet, &vmac);
  44.549 +                    VarpEntry_lock(ventry, flags);        
  44.550 +                } else {
  44.551 +                    dprintf("> empty queue.\n");
  44.552 +                }
  44.553 +                VarpEntry_schedule(ventry);
  44.554              } else {
  44.555 -                dprintf("> empty queue.\n");
  44.556 +                VarpEntry_error(ventry);
  44.557 +                ventry->state = VARP_STATE_FAILED;
  44.558              }
  44.559 -        } else {
  44.560 -            dprintf("> Out of probes: FAILED\n");
  44.561 -            VarpEntry_error(ventry);
  44.562 -            ventry->state = VARP_STATE_FAILED;
  44.563 +            break;
  44.564          }
  44.565      }
  44.566      VarpEntry_set_flags(ventry, VARP_FLAG_PROBING, probing);
  44.567 -    if(locked) VarpEntry_unlock(ventry, flags);
  44.568 +    VarpEntry_unlock(ventry, flags);
  44.569      if(!probing) VarpEntry_decref(ventry);
  44.570      dprintf("<\n");
  44.571  }
  44.572 @@ -631,25 +708,25 @@ static void varp_error_fn(VarpEntry *ven
  44.573   * @return ventry or null
  44.574   */
  44.575  VarpEntry * VarpEntry_new(VnetId *vnet, Vmac *vmac){
  44.576 -    VarpEntry *z = ALLOCATE(VarpEntry);
  44.577 -    if(z){
  44.578 +    VarpEntry *ventry = ALLOCATE(VarpEntry);
  44.579 +    if(ventry){
  44.580          unsigned long now = jiffies;
  44.581  
  44.582 -        atomic_set(&z->refcount, 1);
  44.583 -        z->lock = RW_LOCK_UNLOCKED;
  44.584 -        z->state = VARP_STATE_INCOMPLETE;
  44.585 -        z->queue_max = VARP_QUEUE_MAX;
  44.586 -        skb_queue_head_init(&z->queue);
  44.587 -        init_timer(&z->timer);
  44.588 -        z->timer.data = (unsigned long)z;
  44.589 -        z->timer.function = varp_timer_fn;
  44.590 -        z->timestamp = now;
  44.591 -        z->error = varp_error_fn;
  44.592 +        atomic_set(&ventry->refcount, 1);
  44.593 +        atomic_set(&ventry->probes, 0);
  44.594 +        atomic_set(&ventry->deleted, 0);
  44.595 +        ventry->lock = RW_LOCK_UNLOCKED;
  44.596 +        ventry->state = VARP_STATE_INCOMPLETE;
  44.597 +        ventry->queue_max = VARP_QUEUE_MAX;
  44.598 +        skb_queue_head_init(&ventry->queue);
  44.599 +        timer_init(&ventry->timer, varp_timer_fn, ventry);
  44.600 +        ventry->timestamp = now;
  44.601 +        ventry->error = varp_error_fn;
  44.602  
  44.603 -        z->key.vnet = *vnet;
  44.604 -        z->key.vmac = *vmac;
  44.605 +        ventry->key.vnet = *vnet;
  44.606 +        ventry->key.vmac = *vmac;
  44.607      }
  44.608 -    return z;
  44.609 +    return ventry;
  44.610  }
  44.611  
  44.612  /** Hash function for keys in the varp cache.
  44.613 @@ -658,12 +735,8 @@ VarpEntry * VarpEntry_new(VnetId *vnet, 
  44.614   * @param k key (VarpKey)
  44.615   * @return hashcode
  44.616   */
  44.617 -Hashcode varp_key_hash_fn(void *k){
  44.618 -    VarpKey *key = k;
  44.619 -    Hashcode h = 0;
  44.620 -    h = VnetId_hash(h, &key->vnet);
  44.621 -    h = Vmac_hash(h, &key->vmac);
  44.622 -    return h;
  44.623 +static Hashcode varp_key_hash_fn(void *k){
  44.624 +    return hash_hvoid(0, k, sizeof(VarpKey));
  44.625  }
  44.626  
  44.627  /** Test equality for keys in the varp cache.
  44.628 @@ -673,11 +746,8 @@ Hashcode varp_key_hash_fn(void *k){
  44.629   * @param k2 key to compare (VarpKey)
  44.630   * @return 1 if equal, 0 otherwise
  44.631   */
  44.632 -int varp_key_equal_fn(void *k1, void *k2){
  44.633 -    VarpKey *key1 = k1;
  44.634 -    VarpKey *key2 = k2;
  44.635 -    return (VnetId_eq(&key1->vnet, &key2->vnet) &&
  44.636 -            Vmac_eq(&key1->vmac, &key2->vmac));
  44.637 +static int varp_key_equal_fn(void *k1, void *k2){
  44.638 +    return memcmp(k1, k2, sizeof(VarpKey)) == 0;
  44.639  }
  44.640  
  44.641  /** Free an entry in the varp cache.
  44.642 @@ -696,27 +766,43 @@ static void varp_entry_free_fn(HashTable
  44.643  /** Free the whole varp cache.
  44.644   * Dangerous.
  44.645   *
  44.646 - * @param z varp cache
  44.647 + * @param vtable varp cache
  44.648   */
  44.649 -void VarpTable_free(VarpTable *z){
  44.650 -    unsigned long flags;
  44.651 -    if(!z) return;
  44.652 -    VarpTable_write_lock(z, flags);
  44.653 -    del_timer(&z->timer);
  44.654 -    z->timer.data = 0;
  44.655 -    if(z->table) HashTable_free(z->table); 
  44.656 -    VarpTable_write_unlock(z, flags);
  44.657 -    deallocate(z);
  44.658 +void VarpTable_free(VarpTable *vtable){
  44.659 +    unsigned long vtflags;
  44.660 +    if(!vtable) return;
  44.661 +    VarpTable_write_lock(vtable, vtflags);
  44.662 +    timer_cancel(&vtable->timer);
  44.663 +    vtable->timer.data = 0;
  44.664 +    if(vtable->table){
  44.665 +        HashTable *table = vtable->table;
  44.666 +        HashTable_for_decl(entry);
  44.667 +
  44.668 +        vtable->table = NULL;
  44.669 +        HashTable_for_each(entry, table){
  44.670 +            VarpEntry *ventry = entry->value;
  44.671 +            unsigned long flags;
  44.672 +            VarpEntry_lock(ventry, flags);
  44.673 +            atomic_set(&ventry->deleted, 1);
  44.674 +            if(VarpEntry_get_flags(ventry, VARP_FLAG_PROBING)){
  44.675 +                timer_cancel(&ventry->timer);
  44.676 +                ventry->timer.data = 0;
  44.677 +                VarpEntry_decref(ventry);
  44.678 +            }
  44.679 +            VarpEntry_unlock(ventry, flags);
  44.680 +        }
  44.681 +        HashTable_free(table); 
  44.682 +    }
  44.683 +    VarpTable_write_unlock(vtable, vtflags);
  44.684 +    deallocate(vtable);
  44.685  }
  44.686  
  44.687  /** Schedule the varp table timer.
  44.688   *
  44.689 - * @param z varp table
  44.690 + * @param vtable varp table
  44.691   */
  44.692 -void VarpTable_schedule(VarpTable *z){
  44.693 -    unsigned long now = jiffies;
  44.694 -    z->timer.expires = now + VARP_ENTRY_TTL;
  44.695 -    add_timer(&z->timer);
  44.696 +void VarpTable_schedule(VarpTable *vtable){
  44.697 +    timer_set(&vtable->timer, vtable->entry_ttl);
  44.698  }
  44.699  
  44.700  /** Function called when the varp table timer goes off.
  44.701 @@ -725,30 +811,30 @@ void VarpTable_schedule(VarpTable *z){
  44.702   * @param arg varp table
  44.703   */
  44.704  static void varp_table_timer_fn(unsigned long arg){
  44.705 -    VarpTable *z = (VarpTable *)arg;
  44.706 -    if(z){
  44.707 -        VarpTable_sweep(z, 0);
  44.708 -        VarpTable_schedule(z);
  44.709 +    VarpTable *vtable = (VarpTable *)arg;
  44.710 +    if(vtable){
  44.711 +        VarpTable_sweep(vtable);
  44.712 +        VarpTable_schedule(vtable);
  44.713      }
  44.714  }
  44.715  
  44.716  /** Print a varp table.
  44.717   *
  44.718 - * @param z table
  44.719 + * @param vtable table
  44.720   */
  44.721 -void VarpTable_print(VarpTable *z){
  44.722 +void VarpTable_print(VarpTable *vtable, IOStream *io){
  44.723      HashTable_for_decl(entry);
  44.724      VarpEntry *ventry;
  44.725 -    unsigned long flags, vflags;
  44.726 +    unsigned long vtflags, flags;
  44.727  
  44.728 -    VarpTable_read_lock(z, flags);
  44.729 -    HashTable_for_each(entry, varp_table->table){
  44.730 +    VarpTable_read_lock(vtable, vtflags);
  44.731 +    HashTable_for_each(entry, vtable->table){
  44.732          ventry = entry->value;
  44.733 -        VarpEntry_lock(ventry, vflags);
  44.734 -        VarpEntry_print(ventry);
  44.735 -        VarpEntry_unlock(ventry, vflags);
  44.736 +        VarpEntry_lock(ventry, flags);
  44.737 +        VarpEntry_print(ventry, io);
  44.738 +        VarpEntry_unlock(ventry, flags);
  44.739      }
  44.740 -    VarpTable_read_unlock(z, flags);
  44.741 +    VarpTable_read_unlock(vtable, vtflags);
  44.742  }
  44.743  
  44.744  /** Create a varp table.
  44.745 @@ -757,83 +843,140 @@ void VarpTable_print(VarpTable *z){
  44.746   */
  44.747  VarpTable * VarpTable_new(void){
  44.748      int err = -ENOMEM;
  44.749 -    VarpTable *z = NULL;
  44.750 +    VarpTable *vtable = NULL;
  44.751  
  44.752 -    z = ALLOCATE(VarpTable);
  44.753 -    if(!z) goto exit;
  44.754 -    z->table = HashTable_new(VARP_TABLE_BUCKETS);
  44.755 -    if(!z->table) goto exit;
  44.756 -    z->table->key_equal_fn = varp_key_equal_fn;
  44.757 -    z->table->key_hash_fn = varp_key_hash_fn;
  44.758 -    z->table->entry_free_fn = varp_entry_free_fn;
  44.759 -    init_MUTEX(&z->lock);
  44.760 -    init_timer(&z->timer);
  44.761 -    z->timer.data = (unsigned long)z;
  44.762 -    z->timer.function = varp_table_timer_fn;
  44.763 -    VarpTable_schedule(z);
  44.764 +    vtable = ALLOCATE(VarpTable);
  44.765 +    if(!vtable) goto exit;
  44.766 +    vtable->table = HashTable_new(VARP_TABLE_BUCKETS);
  44.767 +    if(!vtable->table) goto exit;
  44.768 +    vtable->table->key_equal_fn = varp_key_equal_fn;
  44.769 +    vtable->table->key_hash_fn = varp_key_hash_fn;
  44.770 +    vtable->table->entry_free_fn = varp_entry_free_fn;
  44.771 +
  44.772 +    vtable->entry_ttl = VARP_ENTRY_TTL;
  44.773 +    vtable->probe_max = VARP_PROBE_MAX;
  44.774 +    vtable->probe_interval = VARP_PROBE_INTERVAL;
  44.775 +    vtable->queue_max = VARP_QUEUE_MAX;
  44.776 +
  44.777 +    init_MUTEX(&vtable->mutex);
  44.778 +    vtable->lock = RW_LOCK_UNLOCKED;
  44.779 +    timer_init(&vtable->timer, varp_table_timer_fn, vtable);
  44.780      err = 0;
  44.781    exit:
  44.782      if(err){
  44.783 -        VarpTable_free(z);
  44.784 -        z = NULL;
  44.785 +        VarpTable_free(vtable);
  44.786 +        vtable = NULL;
  44.787      }
  44.788 -    return z;
  44.789 +    return vtable;
  44.790  }
  44.791  
  44.792  /** Add a new entry to the varp table.
  44.793   *
  44.794 - * @param z table
  44.795 + * @param vtable table
  44.796   * @param vnet vnet id
  44.797   * @param vmac virtual MAC address (copied)
  44.798   * @return new entry or null
  44.799   */
  44.800 -VarpEntry * VarpTable_add(VarpTable *z, VnetId *vnet, Vmac *vmac){
  44.801 -    int err = -ENOMEM;
  44.802 -    VarpEntry *ventry;
  44.803 -    HTEntry *entry;
  44.804 -    unsigned long flags;
  44.805 +VarpEntry * VarpTable_add(VarpTable *vtable, VnetId *vnet, Vmac *vmac){
  44.806 +    int err = 0;
  44.807 +    VarpKey key = { .vnet = *vnet, .vmac = *vmac};
  44.808 +    VarpEntry *ventry = NULL;
  44.809 +    HTEntry *entry = NULL;
  44.810 +    unsigned long vtflags;
  44.811  
  44.812 +    VarpTable_write_lock(vtable, vtflags);
  44.813 +    ventry = HashTable_get(vtable->table, &key);
  44.814 +    if(ventry){
  44.815 +        VarpEntry_incref(ventry);
  44.816 +        goto exit;
  44.817 +    }
  44.818 +    err = -ENOMEM;
  44.819      ventry = VarpEntry_new(vnet, vmac);
  44.820      if(!ventry) goto exit;
  44.821 -    VarpTable_write_lock(z, flags);
  44.822 -    entry = HashTable_add(z->table, ventry, ventry);
  44.823 -    VarpTable_write_unlock(z, flags);
  44.824 -    if(!entry) goto exit;
  44.825 +    entry = HashTable_add(vtable->table, ventry, ventry);
  44.826 +    if(!entry){
  44.827 +        VarpEntry_decref(ventry);
  44.828 +        ventry = NULL;
  44.829 +        goto exit;
  44.830 +    }
  44.831 +    err = 0;
  44.832      VarpEntry_incref(ventry);
  44.833 -    err = 0;
  44.834    exit:
  44.835 -    if(err){
  44.836 -        VarpEntry_free(ventry);
  44.837 -        ventry = NULL;
  44.838 -    }
  44.839 +    VarpTable_write_unlock(vtable, vtflags);
  44.840      return ventry;
  44.841  }
  44.842  
  44.843  /** Remove an entry from the varp table.
  44.844   *
  44.845 - * @param z table
  44.846 + * @param vtable table
  44.847   * @param ventry entry to remove
  44.848   * @return removed count
  44.849   */
  44.850 -int VarpTable_remove(VarpTable *z, VarpEntry *ventry){
  44.851 -    return HashTable_remove(z->table, ventry);
  44.852 +int VarpTable_remove(VarpTable *vtable, VarpEntry *ventry){
  44.853 +    //TODO: Could send a varp announce with null addr for the entry
  44.854 +    // vnet and vmac to notify others, so they will resolve the addr
  44.855 +    // instead of sending traffic to us.
  44.856 +    atomic_set(&ventry->deleted, 1);
  44.857 +    skb_queue_purge(&ventry->queue);
  44.858 +    return HashTable_remove(vtable->table, ventry);
  44.859 +}
  44.860 +
  44.861 +/** Remove all entries using a vnet.
  44.862 + * Caller must hold the table lock.
  44.863 + *
  44.864 + * @param vtable table
  44.865 + * @param vnet vnet
  44.866 + * @return removed count
  44.867 + */
  44.868 +int VarpTable_remove_vnet(VarpTable *vtable, VnetId *vnet){
  44.869 +    int count = 0;
  44.870 +    HashTable_for_decl(entry);
  44.871 +
  44.872 +    HashTable_for_each(entry, vtable->table){
  44.873 +        VarpEntry *ventry = entry->value;
  44.874 +        if(VnetId_eq(&ventry->key.vnet, vnet)){
  44.875 +            count += VarpTable_remove(vtable, ventry);
  44.876 +        }
  44.877 +    }
  44.878 +    return count;
  44.879 +}
  44.880 +
  44.881 +/** Remove all entries using a vnet from the varp table.
  44.882 + *
  44.883 + * @param vnet vnet
  44.884 + * @return removed count
  44.885 + */
  44.886 +int varp_remove_vnet(VnetId *vnet){
  44.887 +    int count = 0;
  44.888 +    unsigned long vtflags;
  44.889 +
  44.890 +    VarpTable_write_lock(varp_table, vtflags);
  44.891 +    count = VarpTable_remove_vnet(varp_table, vnet);
  44.892 +    VarpTable_write_unlock(varp_table, vtflags);
  44.893 +    return count;
  44.894  }
  44.895  
  44.896  /** Lookup an entry in the varp table.
  44.897   *
  44.898 - * @param z table
  44.899 + * @param vtable table
  44.900   * @param vnet vnet id
  44.901 - * @param vmac virtual MAC addres
  44.902 + * @param vmac virtual MAC address
  44.903 + * @param create create a new entry if needed if true
  44.904   * @return entry found or null
  44.905   */
  44.906 -VarpEntry * VarpTable_lookup(VarpTable *z, VnetId *vnet, Vmac *vmac){
  44.907 -    unsigned long flags;
  44.908 +VarpEntry * VarpTable_lookup(VarpTable *vtable, VnetId *vnet, Vmac *vmac, int create){
  44.909      VarpKey key = { .vnet = *vnet, .vmac = *vmac };
  44.910 -    VarpEntry *ventry;
  44.911 -    VarpTable_read_lock(z, flags);
  44.912 -    ventry = HashTable_get(z->table, &key);
  44.913 +    VarpEntry *ventry = NULL;
  44.914 +    unsigned long vtflags;
  44.915 +
  44.916 +    VarpTable_read_lock(vtable, vtflags);
  44.917 +    ventry = HashTable_get(vtable->table, &key);
  44.918      if(ventry) VarpEntry_incref(ventry);
  44.919 -    VarpTable_read_unlock(z, flags);
  44.920 +    VarpTable_read_unlock(vtable, vtflags);
  44.921 +
  44.922 +    if(!ventry && create){
  44.923 +        ventry = VarpTable_add(vtable, vnet, vmac);
  44.924 +    }
  44.925      return ventry;
  44.926  }
  44.927  
  44.928 @@ -849,11 +992,13 @@ int VarpEntry_send(VarpEntry *ventry, st
  44.929      int err = 0;
  44.930      unsigned long flags = 0;
  44.931      VarpAddr addr;
  44.932 +    VnetId vnet;
  44.933  
  44.934      dprintf("> skb=%p\n", skb);
  44.935 +    vnet = ventry->key.vnet;
  44.936      addr = ventry->addr;
  44.937      VarpEntry_unlock(ventry, flags);
  44.938 -    err = vnet_tunnel_send(&ventry->key.vnet, &addr, skb);
  44.939 +    err = vnet_tunnel_send(&vnet, &addr, skb);
  44.940      VarpEntry_lock(ventry, flags);
  44.941      dprintf("< err=%d\n", err);
  44.942      return err;
  44.943 @@ -872,6 +1017,8 @@ int VarpEntry_send(VarpEntry *ventry, st
  44.944  int VarpEntry_resolve(VarpEntry *ventry, struct sk_buff *skb){
  44.945      int err = 0;
  44.946      unsigned long flags = 0;
  44.947 +    VnetId vnet;
  44.948 +    Vmac vmac;
  44.949  
  44.950      dprintf("> skb=%p\n", skb);
  44.951      ventry->state = VARP_STATE_INCOMPLETE;
  44.952 @@ -881,19 +1028,23 @@ int VarpEntry_resolve(VarpEntry *ventry,
  44.953          VarpEntry_incref(ventry);
  44.954          VarpEntry_schedule(ventry);
  44.955      }
  44.956 +    vnet = ventry->key.vnet;
  44.957 +    vmac = *(Vmac*)eth_hdr(skb)->h_dest;
  44.958      VarpEntry_unlock(ventry, flags);
  44.959 -    varp_solicit(skb, &ventry->key.vnet);
  44.960 +    varp_solicit(&vnet, &vmac);
  44.961      VarpEntry_lock(ventry, flags);
  44.962  
  44.963      if(ventry->state == VARP_STATE_INCOMPLETE){
  44.964 -        if(skb_queue_len(&ventry->queue) >= ventry->queue_max){
  44.965 +        while(skb_queue_len(&ventry->queue) >= ventry->queue_max){
  44.966              struct sk_buff *oldskb;
  44.967 -            oldskb = ventry->queue.next;
  44.968 -            __skb_unlink(oldskb, &ventry->queue);
  44.969 +            oldskb = skb_dequeue(&ventry->queue);
  44.970 +            //oldskb = ventry->queue.next;
  44.971 +            //__skb_unlink(oldskb, &ventry->queue);
  44.972 +            if(!oldskb) break;
  44.973              dprintf("> dropping skb=%p\n", oldskb);
  44.974              kfree_skb(oldskb);
  44.975          }
  44.976 -        __skb_queue_tail(&ventry->queue, skb);
  44.977 +        skb_queue_tail(&ventry->queue, skb);
  44.978      } else {
  44.979          err = VarpEntry_send(ventry, skb);
  44.980      }
  44.981 @@ -901,27 +1052,6 @@ int VarpEntry_resolve(VarpEntry *ventry,
  44.982      return err;
  44.983  }
  44.984  
  44.985 -/** Handle output for a ventry. Resolves the ventry
  44.986 - * if necessary.
  44.987 - *
  44.988 - * @param ventry varp entry
  44.989 - * @param skb skb to send
  44.990 - * @return 0 on success, error code otherwise
  44.991 - */
  44.992 -int VarpEntry_output(VarpEntry *ventry, struct sk_buff *skb){
  44.993 -    int err = 0;
  44.994 -
  44.995 -    switch(ventry->state){
  44.996 -    case VARP_STATE_REACHABLE:
  44.997 -        err = VarpEntry_send(ventry, skb);
  44.998 -        break;
  44.999 -    default:
 44.1000 -        err = VarpEntry_resolve(ventry, skb);
 44.1001 -        break;
 44.1002 -    }
 44.1003 -    return err;
 44.1004 -}
 44.1005 -
 44.1006  /** Process the output queue for a ventry.  Sends the queued skbs if
 44.1007   * the ventry is reachable, otherwise drops them.
 44.1008   *
 44.1009 @@ -931,13 +1061,60 @@ void VarpEntry_process_queue(VarpEntry *
 44.1010      struct sk_buff *skb;
 44.1011      for( ; ; ){
 44.1012          if(ventry->state != VARP_STATE_REACHABLE) break;
 44.1013 -        skb = __skb_dequeue(&ventry->queue);
 44.1014 +        skb = skb_dequeue(&ventry->queue);
 44.1015          if(!skb) break;
 44.1016 -        VarpEntry_output(ventry, skb);
 44.1017 +        VarpEntry_send(ventry, skb);
 44.1018      }
 44.1019      skb_queue_purge(&ventry->queue);
 44.1020  }
 44.1021  
 44.1022 +/** Multicast an skb on a vnet.
 44.1023 + *
 44.1024 + * @param vnet vnet id
 44.1025 + * @param skb skb to send
 44.1026 + * @return 0 on success, error code otherwise
 44.1027 + */
 44.1028 +static int varp_multicast(VnetId *vnet, struct sk_buff *skb){
 44.1029 +    VarpAddr addr = { .family = AF_INET };
 44.1030 +    addr.u.ip4.s_addr = varp_mcast_addr;
 44.1031 +    return vnet_tunnel_send(vnet, &addr, skb);
 44.1032 +}
 44.1033 +
 44.1034 +/** Handle output for a ventry. Resolves the ventry
 44.1035 + * if necessary.
 44.1036 + *
 44.1037 + * @param ventry varp entry
 44.1038 + * @param skb skb to send
 44.1039 + * @return 0 on success, error code otherwise
 44.1040 + */
 44.1041 +int VarpEntry_output(VarpEntry *ventry, struct sk_buff *skb){
 44.1042 +    int err = 0;
 44.1043 +    unsigned long flags;
 44.1044 +
 44.1045 +    VarpEntry_lock(ventry, flags);
 44.1046 +    switch(ventry->state){
 44.1047 +    case VARP_STATE_REACHABLE:
 44.1048 +        if(skb_queue_len(&ventry->queue) > 0){
 44.1049 +            VarpEntry_process_queue(ventry);
 44.1050 +        }
 44.1051 +        err = VarpEntry_send(ventry, skb);
 44.1052 +        break;
 44.1053 +    default: 
 44.1054 +        if(0){
 44.1055 +            err = VarpEntry_resolve(ventry, skb);
 44.1056 +        } else {     
 44.1057 +            // Multicast the skb if the entry is not reachable.
 44.1058 +            VnetId vnet = ventry->key.vnet;
 44.1059 +            VarpEntry_unlock(ventry, flags);
 44.1060 +            err = varp_multicast(&vnet, skb);
 44.1061 +            VarpEntry_lock(ventry, flags);
 44.1062 +        }
 44.1063 +        break;
 44.1064 +    }
 44.1065 +    VarpEntry_unlock(ventry, flags);
 44.1066 +    return err;
 44.1067 +}
 44.1068 +
 44.1069  /** Update a ventry. Sets the address and state to those given
 44.1070   * and sets the timestamp to 'now'.
 44.1071   *
 44.1072 @@ -946,132 +1123,155 @@ void VarpEntry_process_queue(VarpEntry *
 44.1073   * @param state state
 44.1074   * @return 0 on success, error code otherwise
 44.1075   */
 44.1076 -int VarpEntry_update(VarpEntry *ventry, VarpAddr *addr, int state){
 44.1077 +int VarpEntry_update(VarpEntry *ventry, VarpAddr *addr, int state, int vflags){
 44.1078      int err = 0;
 44.1079      unsigned long now = jiffies;
 44.1080      unsigned long flags;
 44.1081  
 44.1082 -    dprintf("> addr=" IPFMT " state=%d\n", NIPQUAD(addr), state);
 44.1083      VarpEntry_lock(ventry, flags);
 44.1084 +    //if(atomic_read(&ventry->deleted)) goto exit;
 44.1085      if(VarpEntry_get_flags(ventry, VARP_FLAG_PERMANENT)) goto exit;
 44.1086      ventry->addr = *addr;
 44.1087      ventry->timestamp = now;
 44.1088      ventry->state = state;
 44.1089 -    VarpEntry_process_queue(ventry);
 44.1090 +    // Can't process the queue while atomic as it calls schedule(),
 44.1091 +    // and that's bad.
 44.1092 +    //if(0 && (vflags & VARP_UPDATE_QUEUE) && !in_atomic()){
 44.1093 +    //    VarpEntry_process_queue(ventry);
 44.1094 +    //}
 44.1095    exit:
 44.1096      VarpEntry_unlock(ventry, flags);
 44.1097      dprintf("< err=%d\n", err);
 44.1098      return err;
 44.1099  }
 44.1100      
 44.1101 -int VarpTable_update(VarpTable *z, VnetId *vnet, Vmac *vmac, VarpAddr *addr,
 44.1102 -                     int state, int force){
 44.1103 +/** Update the entry for a vnet.
 44.1104 + *
 44.1105 + * @param vtable varp table
 44.1106 + * @param vnet vnet id
 44.1107 + * @param vmac mac address
 44.1108 + * @param addr care-of-address
 44.1109 + * @param state state
 44.1110 + * @param flags update flags
 44.1111 + * @return 0 on success, error code otherwise
 44.1112 + */
 44.1113 +int VarpTable_update(VarpTable *vtable, VnetId *vnet, Vmac *vmac, VarpAddr *addr,
 44.1114 +                     int state, int flags){
 44.1115      int err = 0;
 44.1116      VarpEntry *ventry;
 44.1117  #ifdef DEBUG
 44.1118      char vnetbuf[VNET_ID_BUF];
 44.1119      char addrbuf[VARP_ADDR_BUF];
 44.1120 -#endif
 44.1121      
 44.1122 -    dprintf("> vnet=%s mac=" MACFMT " addr=%s state=%d force=%d\n",
 44.1123 +    dprintf("> vnet=%s mac=" MACFMT " addr=%s state=%d flags=%x\n",
 44.1124              VnetId_ntoa(vnet, vnetbuf),
 44.1125              MAC6TUPLE(vmac->mac),
 44.1126              VarpAddr_ntoa(addr, addrbuf),
 44.1127              state,
 44.1128 -            force);
 44.1129 -    ventry = VarpTable_lookup(z, vnet, vmac);
 44.1130 -    if(force && !ventry){
 44.1131 -        dprintf("> No entry, adding\n");
 44.1132 -        ventry = VarpTable_add(z, vnet, vmac);
 44.1133 +            flags);
 44.1134 +#endif
 44.1135 +    ventry = VarpTable_lookup(vtable, vnet, vmac, (flags & VARP_UPDATE_CREATE));
 44.1136 +    if(!ventry){
 44.1137 +        err = -ENOENT;
 44.1138 +        goto exit;
 44.1139      }
 44.1140 -    if(ventry){
 44.1141 -        dprintf("> Updating\n");
 44.1142 -        err = VarpEntry_update(ventry, addr, state);
 44.1143 -        VarpEntry_decref(ventry);
 44.1144 -    } else {
 44.1145 -        dprintf("> No entry found\n");
 44.1146 -        err = -ENOENT;
 44.1147 -    }
 44.1148 +    err = VarpEntry_update(ventry, addr, state, flags);
 44.1149 +    VarpEntry_decref(ventry);
 44.1150 +  exit:
 44.1151      dprintf("< err=%d\n", err);
 44.1152      return err;
 44.1153  }
 44.1154  
 44.1155 -/** Update the ventry corresponding to the given varp header.
 44.1156 +/** Update the entry for a vnet: make it reachable and create an entry
 44.1157 + * if needed.
 44.1158   *
 44.1159 - * @param z table
 44.1160 - * @param varph varp header
 44.1161 - * @param state state
 44.1162 - * @return 0 on success, -ENOENT if no entry found
 44.1163 + * @param vnet vnet id
 44.1164 + * @param vmac mac address
 44.1165 + * @param addr care-of-address
 44.1166 + * @return 0 on success, error code otherwise
 44.1167   */
 44.1168 -int VarpTable_update_entry(VarpTable *z, VarpHdr *varph, int state){
 44.1169 -    return VarpTable_update(z, &varph->vnet, &varph->vmac, &varph->addr, state, 0);
 44.1170 +int varp_update(VnetId *vnet, unsigned char *vmac, VarpAddr *addr){
 44.1171 +    int err = 0;
 44.1172 +    if(!varp_table){
 44.1173 +        err = -ENOSYS;
 44.1174 +    } else {
 44.1175 +        err = VarpTable_update(varp_table, vnet, (Vmac*)vmac, addr,
 44.1176 +                               VARP_STATE_REACHABLE, VARP_UPDATE_CREATE);
 44.1177 +    }
 44.1178 +    return err;
 44.1179  }
 44.1180  
 44.1181 -int varp_update(VnetId *vnet, unsigned char *vmac, VarpAddr *addr){
 44.1182 -    if(!varp_table){
 44.1183 -        return -ENOSYS;
 44.1184 -    }
 44.1185 -    return VarpTable_update(varp_table, vnet, (Vmac*)vmac, addr,
 44.1186 -                            VARP_STATE_REACHABLE, 1);
 44.1187 +static inline int VarpEntry_sweepable(VarpEntry *ventry){
 44.1188 +    return !VarpEntry_get_flags(ventry, (VARP_FLAG_PERMANENT | VARP_FLAG_PROBING));
 44.1189  }
 44.1190  
 44.1191 -/** Put old varp entries into the incomplete state.
 44.1192 - * Permanent entries are not changed.
 44.1193 - * If 'all' is non-zero, all non-permanent entries
 44.1194 - * are put into the incomplete state, regardless of age.
 44.1195 +static inline int VarpTable_old(VarpTable *vtable, VarpEntry *ventry, unsigned long now){
 44.1196 +    return now - ventry->timestamp > vtable->entry_ttl;
 44.1197 +}
 44.1198 +
 44.1199 +/** Sweep old varp entries.
 44.1200 + * Doesn't affect entries that are probing or permanent.
 44.1201   *
 44.1202 - * @param z table
 44.1203 - * @param all reset all entries if non-zero
 44.1204 + * @param vtable table
 44.1205   */
 44.1206 -void VarpTable_sweep(VarpTable *z, int all){
 44.1207 +void VarpTable_sweep(VarpTable *vtable){
 44.1208      HashTable_for_decl(entry);
 44.1209      VarpEntry *ventry;
 44.1210      unsigned long now = jiffies;
 44.1211 -    unsigned long old = now - VARP_ENTRY_TTL;
 44.1212 -    unsigned long flags, vflags;
 44.1213 +    unsigned long vtflags, flags;
 44.1214 +    int sweep, swept = 0;
 44.1215  
 44.1216 -    VarpTable_read_lock(z, flags);
 44.1217 -    HashTable_for_each(entry, varp_table->table){
 44.1218 +    if(!vtable) return;
 44.1219 +    VarpTable_write_lock(vtable, vtflags);
 44.1220 +    HashTable_for_each(entry, vtable->table){
 44.1221          ventry = entry->value;
 44.1222 -        VarpEntry_lock(ventry, vflags);
 44.1223 -        if(!VarpEntry_get_flags(ventry, VARP_FLAG_PERMANENT) &&
 44.1224 -           (all || (ventry->timestamp < old))){
 44.1225 -            VarpEntry_process_queue(ventry);
 44.1226 +        VarpEntry_lock(ventry, flags);
 44.1227 +        sweep = VarpEntry_sweepable(ventry) && VarpTable_old(vtable, ventry, now);
 44.1228 +        if(sweep){
 44.1229 +            swept++;
 44.1230 +            iprintf("> Sweeping:\n");
 44.1231 +            VarpEntry_print(ventry, iostdout);
 44.1232 +            //VarpEntry_process_queue(ventry);
 44.1233              ventry->state = VARP_STATE_INCOMPLETE;
 44.1234          }
 44.1235 -        VarpEntry_unlock(ventry, vflags);
 44.1236 +        VarpEntry_unlock(ventry, flags);
 44.1237 +        if(sweep){
 44.1238 +            VarpTable_remove(vtable, ventry);
 44.1239 +        }
 44.1240      }
 44.1241 -    VarpTable_read_unlock(z, flags);
 44.1242 +    VarpTable_write_unlock(vtable, vtflags);
 44.1243 +    if(swept){
 44.1244 +        iprintf(">\n");
 44.1245 +        varp_print(iostdout);
 44.1246 +    }
 44.1247  }
 44.1248  
 44.1249  /** Flush the varp table.
 44.1250 - * Remove old unreachable varp entries with empty queues.
 44.1251 - * Permanent entries are not removed.
 44.1252   *
 44.1253 - * @param z table
 44.1254 + * @param vtable table
 44.1255   */
 44.1256 -void VarpTable_flush(VarpTable *z){
 44.1257 +void VarpTable_flush(VarpTable *vtable){
 44.1258      HashTable_for_decl(entry);
 44.1259      VarpEntry *ventry;
 44.1260 -    unsigned long now = jiffies;
 44.1261 -    unsigned long old = now - VARP_ENTRY_TTL;
 44.1262 -    unsigned long flags, vflags;
 44.1263 +    unsigned long vtflags, flags;
 44.1264      int flush;
 44.1265  
 44.1266 -    VarpTable_write_lock(z, flags);
 44.1267 -    HashTable_for_each(entry, varp_table->table){
 44.1268 +    VarpTable_write_lock(vtable, vtflags);
 44.1269 +    HashTable_for_each(entry, vtable->table){
 44.1270          ventry = entry->value;
 44.1271 -        VarpEntry_lock(ventry, vflags);
 44.1272 +        VarpEntry_lock(ventry, flags);
 44.1273          flush = (!VarpEntry_get_flags(ventry, VARP_FLAG_PERMANENT) &&
 44.1274 -                 (ventry->timestamp < old) &&
 44.1275 -                 (ventry->state != VARP_STATE_REACHABLE) &&
 44.1276 -                 (skb_queue_len(&ventry->queue) == 0));
 44.1277 -        VarpEntry_unlock(ventry, vflags);
 44.1278 +                 !VarpEntry_get_flags(ventry, VARP_FLAG_PROBING));                
 44.1279          if(flush){
 44.1280 -            VarpTable_remove(z, ventry);
 44.1281 +            iprintf("> Flushing:\n");
 44.1282 +            VarpEntry_print(ventry, iostdout);
 44.1283 +        }
 44.1284 +        VarpEntry_unlock(ventry, flags);
 44.1285 +        if(flush){
 44.1286 +            VarpTable_remove(vtable, ventry);
 44.1287          }
 44.1288      }
 44.1289 -    VarpTable_write_unlock(z, flags);
 44.1290 +    VarpTable_write_unlock(vtable, vtflags);
 44.1291  }
 44.1292  
 44.1293  /** Handle a varp request. Look for a vif with the requested 
 44.1294 @@ -1129,7 +1329,10 @@ int varp_handle_announce(struct sk_buff 
 44.1295      int err = 0;
 44.1296  
 44.1297      dprintf(">\n");
 44.1298 -    err = VarpTable_update_entry(varp_table, varph, VARP_STATE_REACHABLE);
 44.1299 +    err = VarpTable_update(varp_table,
 44.1300 +                           &varph->vnet, &varph->vmac, &varph->addr,
 44.1301 +                           VARP_STATE_REACHABLE, 
 44.1302 +                           (VARP_UPDATE_CREATE | VARP_UPDATE_QUEUE));
 44.1303      dprintf("< err=%d\n", err);
 44.1304      return err;
 44.1305  }
 44.1306 @@ -1140,33 +1343,51 @@ int varp_handle_announce(struct sk_buff 
 44.1307   * @return 0 if OK, error code otherwise
 44.1308   */
 44.1309  int varp_handle_message(struct sk_buff *skb){
 44.1310 -    // Assume h. nh set, skb->data point after udp hdr (at varphdr).
 44.1311 -    int err = -EINVAL, mine = 0;
 44.1312 -    VarpHdr *varph = (void*)(skb->h.uh + 1);
 44.1313 +    // Assume nh, h set, skb->data points at udp hdr (h).
 44.1314 +    int err = -EINVAL;
 44.1315 +    VarpHdr *varph; // = (void*)(skb->h.uh + 1);
 44.1316  
 44.1317 -    dprintf(">\n");
 44.1318 +    dprintf("> skb=%p saddr=" IPFMT " daddr=" IPFMT "\n",
 44.1319 +            skb,
 44.1320 +            NIPQUAD(skb->nh.iph->saddr),
 44.1321 +            NIPQUAD(skb->nh.iph->daddr));
 44.1322      if(!varp_table){
 44.1323          err = -ENOSYS;
 44.1324 +        return err;
 44.1325 +    }
 44.1326 +    if(MULTICAST(skb->nh.iph->daddr)){
 44.1327 +        if(skb->nh.iph->daddr != varp_mcast_addr){
 44.1328 +            // Ignore multicast packets not addressed to us.
 44.1329 +            err = 0;
 44.1330 +            dprintf("> Ignoring daddr=" IPFMT " mcaddr=" IPFMT "\n",
 44.1331 +                    NIPQUAD(skb->nh.iph->daddr), NIPQUAD(varp_mcast_addr));
 44.1332 +            goto exit;
 44.1333 +        }
 44.1334 +    }
 44.1335 +    varph = (void*)skb_pull(skb, sizeof(struct udphdr));
 44.1336 +    if(skb->len < sizeof(struct VnetMsgHdr)){
 44.1337 +        wprintf("> Varp msg too short: %d < %d\n", skb->len, sizeof(struct VnetMsgHdr));
 44.1338          goto exit;
 44.1339      }
 44.1340 -    if(MULTICAST(skb->nh.iph->daddr) &&
 44.1341 -       (skb->nh.iph->daddr != varp_mcast_addr)){
 44.1342 -        // Ignore multicast packets not addressed to us.
 44.1343 -        err = 0;
 44.1344 -        dprintf("> Ignoring daddr=" IPFMT " mcaddr=" IPFMT "\n",
 44.1345 -                NIPQUAD(skb->nh.iph->daddr), NIPQUAD(varp_mcast_addr));
 44.1346 +    switch(ntohs(varph->hdr.id)){
 44.1347 +    case VARP_ID: // Varp message. Handled below.
 44.1348 +        if(skb->len < sizeof(*varph)){
 44.1349 +            wprintf("> Varp msg too short: %d < %d\n", skb->len, sizeof(*varph));
 44.1350 +            goto exit;
 44.1351 +        }
 44.1352 +        break;
 44.1353 +    case VUDP_ID: // Etherip-in-udp packet.
 44.1354 +        skb_pull(skb, sizeof(struct VnetMsgHdr));
 44.1355 +        err = etherip_protocol_recv(skb);
 44.1356          goto exit;
 44.1357 -    }
 44.1358 -    if(skb->len < sizeof(*varph)){
 44.1359 -        wprintf("> Varp msg too short: %d < %d\n", skb->len, sizeof(*varph));
 44.1360 +    case VFWD_ID: // Forwarded.
 44.1361 +        skb_pull(skb, sizeof(struct VnetMsgHdr));
 44.1362 +        err = vnet_forward_recv(skb);
 44.1363          goto exit;
 44.1364 -    }
 44.1365 -    mine = 1;
 44.1366 -    if(varph->hdr.id != htons(VARP_ID)){
 44.1367 +    default:
 44.1368          // It's not varp at all - ignore it.
 44.1369 -        wprintf("> Invalid varp id: %d, expected %d \n",
 44.1370 -                ntohs(varph->hdr.id),
 44.1371 -                VARP_ID);
 44.1372 +        wprintf("> Invalid varp id: %d\n", ntohs(varph->hdr.id));
 44.1373 +        print_skb("INVALID", 0, skb);
 44.1374          goto exit;
 44.1375      }
 44.1376  #ifdef DEBUG
 44.1377 @@ -1196,7 +1417,6 @@ int varp_handle_message(struct sk_buff *
 44.1378          break;
 44.1379      }
 44.1380    exit:
 44.1381 -    if(mine) err = 1;
 44.1382      dprintf("< err=%d\n", err);
 44.1383      return err;
 44.1384  }
 44.1385 @@ -1212,8 +1432,11 @@ int varp_output(struct sk_buff *skb, Vne
 44.1386      unsigned char *mac = NULL;
 44.1387      Vmac *vmac = NULL;
 44.1388      VarpEntry *ventry = NULL;
 44.1389 +#if defined(DEBUG)
 44.1390 +    char vnetbuf[VNET_ID_BUF];
 44.1391 +#endif
 44.1392  
 44.1393 -    dprintf(">\n");
 44.1394 +    dprintf("> vnet=%s\n", VnetId_ntoa(vnet, vnetbuf));
 44.1395      if(!varp_table){
 44.1396          err = -ENOSYS;
 44.1397          goto exit;
 44.1398 @@ -1226,20 +1449,11 @@ int varp_output(struct sk_buff *skb, Vne
 44.1399      mac = eth_hdr(skb)->h_dest;
 44.1400      vmac = (Vmac*)mac;
 44.1401      if(mac_is_multicast(mac)){
 44.1402 -        VarpAddr addr = {};
 44.1403 -        addr.family = AF_INET;
 44.1404 -        addr.u.ip4.s_addr = varp_mcast_addr;
 44.1405 -        err = vnet_tunnel_send(vnet, &addr, skb);
 44.1406 +        err = varp_multicast(vnet, skb);
 44.1407      } else {
 44.1408 -        ventry = VarpTable_lookup(varp_table, vnet, vmac);
 44.1409 -        if(!ventry){
 44.1410 -            ventry = VarpTable_add(varp_table, vnet, vmac);
 44.1411 -        }
 44.1412 +        ventry = VarpTable_lookup(varp_table, vnet, vmac, 1);
 44.1413          if(ventry){
 44.1414 -            unsigned long flags;
 44.1415 -            VarpEntry_lock(ventry, flags);
 44.1416              err = VarpEntry_output(ventry, skb);
 44.1417 -            VarpEntry_unlock(ventry, flags);
 44.1418              VarpEntry_decref(ventry);
 44.1419          } else {
 44.1420              err = -ENOMEM;
 44.1421 @@ -1292,6 +1506,7 @@ int varp_init(void){
 44.1422          err = -ENOMEM;
 44.1423          goto exit;
 44.1424      }
 44.1425 +    VarpTable_schedule(varp_table);
 44.1426      varp_init_mcast_addr(varp_mcaddr);
 44.1427      varp_port = htons(VARP_PORT);
 44.1428  
 44.1429 @@ -1307,9 +1522,9 @@ void varp_exit(void){
 44.1430      dprintf(">\n");
 44.1431      varp_close();
 44.1432      if(varp_table){
 44.1433 -        VarpTable *z = varp_table;
 44.1434 +        VarpTable *vtable = varp_table;
 44.1435          varp_table = NULL;
 44.1436 -        VarpTable_free(z);
 44.1437 +        VarpTable_free(vtable);
 44.1438      }
 44.1439      dprintf("<\n");
 44.1440  }
    45.1 --- a/tools/vnet/vnet-module/varp.h	Thu Feb 09 16:09:00 2006 +0100
    45.2 +++ b/tools/vnet/vnet-module/varp.h	Thu Feb 09 16:12:11 2006 +0100
    45.3 @@ -1,5 +1,5 @@
    45.4  /*
    45.5 - * Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
    45.6 + * Copyright (C) 2004, 2005 Mike Wray <mike.wray@hp.com>
    45.7   *
    45.8   * This program is free software; you can redistribute it and/or modify
    45.9   * it under the terms of the GNU General Public License as published by the 
   45.10 @@ -19,23 +19,38 @@
   45.11  
   45.12  #ifndef _VNET_VARP_H
   45.13  #define _VNET_VARP_H
   45.14 +
   45.15 +#ifdef __KERNEL__
   45.16 +
   45.17 +#else
   45.18 +
   45.19 +#include "sys_kernel.h"
   45.20 +
   45.21 +#endif
   45.22 +
   45.23  #include "hash_table.h"
   45.24  #include "if_varp.h"
   45.25  #include "varp_util.h"
   45.26  
   45.27 -
   45.28  #define CONFIG_VARP_GRATUITOUS 1
   45.29  
   45.30  struct net_device;
   45.31  struct sk_buff;
   45.32  struct Vif;
   45.33  
   45.34 +enum {
   45.35 +    VARP_UPDATE_CREATE = 1,
   45.36 +    VARP_UPDATE_QUEUE  = 2,
   45.37 +};
   45.38 +
   45.39  extern int vnet_get_device(const char *name, struct net_device **dev);
   45.40  extern int vnet_get_device_address(struct net_device *dev, u32 *addr);
   45.41  
   45.42 +extern int varp_remove_vnet(struct VnetId *vnet);
   45.43  extern int varp_handle_message(struct sk_buff *skb);
   45.44  extern int varp_output(struct sk_buff *skb, struct VnetId *vnet);
   45.45 -extern int varp_update(struct VnetId *vnet, unsigned char *vmac, struct VarpAddr *addr);
   45.46 +extern int varp_update(struct VnetId *vnet, unsigned char *vmac,
   45.47 +                       struct VarpAddr *addr);
   45.48  
   45.49  extern int varp_init(void);
   45.50  extern void varp_exit(void);
   45.51 @@ -44,12 +59,13 @@ extern int varp_open(u32 mcaddr, u16 por
   45.52  extern void varp_close(void);
   45.53  extern int varp_set_mcast_addr(u32 addr);
   45.54  
   45.55 -extern void varp_print(void);
   45.56 +extern void varp_print(struct IOStream *io);
   45.57  extern void varp_flush(void);
   45.58  
   45.59  extern int varp_announce_vif(struct net_device *dev, struct Vif *vif);
   45.60  
   45.61  extern u32 varp_mcast_addr;
   45.62 +extern u16 varp_port;
   45.63  
   45.64  /* MAC broadcast addr is ff-ff-ff-ff-ff-ff (all 1's).
   45.65   * MAC multicast addr has low bit 1, i.e. 01-00-00-00-00-00.
    46.1 --- a/tools/vnet/vnet-module/varp_socket.c	Thu Feb 09 16:09:00 2006 +0100
    46.2 +++ b/tools/vnet/vnet-module/varp_socket.c	Thu Feb 09 16:12:11 2006 +0100
    46.3 @@ -1,5 +1,5 @@
    46.4  /*
    46.5 - * Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
    46.6 + * Copyright (C) 2004, 2005 Mike Wray <mike.wray@hp.com>
    46.7   *
    46.8   * This program is free software; you can redistribute it and/or modify
    46.9   * it under the terms of the GNU General Public License as published by the 
   46.10 @@ -23,6 +23,7 @@
   46.11  #include <asm/uaccess.h>
   46.12  #include <linux/net.h>
   46.13  #include <linux/in.h>
   46.14 +#include <linux/ip.h>
   46.15  #include <linux/sched.h>
   46.16  #include <linux/file.h>
   46.17  #include <linux/version.h>
   46.18 @@ -31,6 +32,7 @@
   46.19  
   46.20  #include <if_varp.h>
   46.21  #include <varp.h>
   46.22 +#include <vnet_forward.h>
   46.23  
   46.24  /* Get macros needed to define system calls as functions in the kernel. */
   46.25  #define __KERNEL_SYSCALLS__
   46.26 @@ -42,30 +44,33 @@ static int errno;
   46.27  #undef DEBUG
   46.28  #include "debug.h"
   46.29  
   46.30 -// Compensate for struct sock fields having 'sk_' added
   46.31 -// to them in 2.6.
   46.32 -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
   46.33 -
   46.34 -#define SK_RECEIVE_QUEUE sk_receive_queue
   46.35 -#define SK_SLEEP         sk_sleep
   46.36 -
   46.37 -#else
   46.38 -
   46.39 -#define SK_RECEIVE_QUEUE receive_queue
   46.40 -#define SK_SLEEP         sleep
   46.41 -
   46.42 -#endif
   46.43 -
   46.44  /** @file
   46.45   * Support for the VARP udp sockets.
   46.46   */
   46.47  
   46.48 +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
   46.49 +
   46.50 +/* Compensate for struct sock fields having 'sk_' added to them in 2.6. */
   46.51 +#define sk_receive_queue receive_queue
   46.52 +#define sk_sleep         sleep
   46.53 +
   46.54 +/* Here because inline in 'socket.c' (2.4, in net.h for 2.6). */
   46.55 +#define sockfd_put(sock) fput((sock)->file)
   46.56 +
   46.57 +#endif
   46.58 +
   46.59  static inline mm_segment_t change_fs(mm_segment_t fs){
   46.60      mm_segment_t oldfs = get_fs();
   46.61      set_fs(fs);
   46.62      return oldfs;
   46.63  }
   46.64  
   46.65 +/** Define the fcntl() syscall. */
   46.66 +static inline _syscall3(int, fcntl,
   46.67 +                        unsigned int, fd, 
   46.68 +                        unsigned int, cmd,
   46.69 +                        unsigned long, arg)
   46.70 +
   46.71  /* Replicate the user-space socket API.
   46.72   * The parts we need anyway.
   46.73   */
   46.74 @@ -183,6 +188,7 @@ enum VsockFlag {
   46.75      VSOCK_CONNECT   =  4,
   46.76      VSOCK_BROADCAST =  8,
   46.77      VSOCK_MULTICAST = 16,
   46.78 +    VSOCK_NONBLOCK  = 32,
   46.79   };
   46.80  
   46.81  /** Convert socket flags to a string.
   46.82 @@ -191,33 +197,42 @@ enum VsockFlag {
   46.83   * @return static string
   46.84   */
   46.85  char * socket_flags(int flags){
   46.86 -    static char s[6];
   46.87 +    static char s[7];
   46.88      int i = 0;
   46.89      s[i++] = (flags & VSOCK_CONNECT   ? 'c' : '-');
   46.90      s[i++] = (flags & VSOCK_BIND      ? 'b' : '-');
   46.91      s[i++] = (flags & VSOCK_REUSE     ? 'r' : '-');
   46.92      s[i++] = (flags & VSOCK_BROADCAST ? 'B' : '-');
   46.93      s[i++] = (flags & VSOCK_MULTICAST ? 'M' : '-');
   46.94 +    s[i++] = (flags & VSOCK_NONBLOCK  ? 'N' : '-');
   46.95      s[i++] = '\0';
   46.96      return s;
   46.97  }
   46.98  
   46.99 +/** Control flag for whether varp should be running.
  46.100 + * If this is set 0 then the varp thread will notice and
  46.101 + * (eventually) exit.
  46.102 + */
  46.103 +atomic_t varp_run = ATOMIC_INIT(0);
  46.104 +
  46.105 +enum {
  46.106 +    VARP_STATE_EXITED  = 2,
  46.107 +    VARP_STATE_RUNNING = 1,
  46.108 +    VARP_STATE_NONE    = 0,
  46.109 +    VARP_STATE_ERROR   = -1,
  46.110 +};
  46.111 +
  46.112 +/** State indicating whether the varp thread is running. */
  46.113 +atomic_t varp_state = ATOMIC_INIT(VARP_STATE_NONE);
  46.114 +
  46.115 +int varp_thread_err = 0;
  46.116 +
  46.117  /** The varp multicast socket. */
  46.118  int varp_mcast_sock = -1;
  46.119  
  46.120  /** The varp unicast socket. */
  46.121  int varp_ucast_sock = -1;
  46.122  
  46.123 -/** Control flag for whether varp should be running.
  46.124 - * If this is set 0 then the varp thread will notice and
  46.125 - * (eventually) exit. This is indicated by setting varp_running
  46.126 - * to 0.
  46.127 - */
  46.128 -atomic_t varp_run = ATOMIC_INIT(0);
  46.129 -
  46.130 -/** State flag indicating whether the varp thread is running. */
  46.131 -atomic_t varp_running = ATOMIC_INIT(0);
  46.132 -
  46.133  /** Set socket option to reuse address.
  46.134   *
  46.135   * @param sock socket
  46.136 @@ -274,7 +289,6 @@ int setsock_multicast(int sock, uint32_t
  46.137          goto exit;
  46.138      }
  46.139    exit:
  46.140 -    err = 0; //todo: remove hack
  46.141      return err;
  46.142  }
  46.143  
  46.144 @@ -305,12 +319,9 @@ int create_socket(int socktype, uint32_t
  46.145      struct sockaddr_in addr_in;
  46.146      struct sockaddr *addr = (struct sockaddr *)&addr_in;
  46.147      int addr_n = sizeof(addr_in);
  46.148 -    int reuse, bcast;
  46.149      int sockproto = 0;
  46.150  
  46.151      //dprintf(">\n");
  46.152 -    reuse = (flags & VSOCK_REUSE);
  46.153 -    bcast = (flags & VSOCK_BROADCAST);
  46.154      addr_in.sin_family      = AF_INET;
  46.155      addr_in.sin_addr.s_addr = saddr;
  46.156      addr_in.sin_port        = port;
  46.157 @@ -324,12 +335,12 @@ int create_socket(int socktype, uint32_t
  46.158      }
  46.159      sock = socket(AF_INET, socktype, sockproto);
  46.160      if(sock < 0) goto exit;
  46.161 -    if(reuse){
  46.162 -        err = setsock_reuse(sock, reuse);
  46.163 +    if(flags & VSOCK_REUSE){
  46.164 +        err = setsock_reuse(sock, 1);
  46.165          if(err < 0) goto exit;
  46.166      }
  46.167 -    if(bcast){
  46.168 -        err = setsock_broadcast(sock, bcast);
  46.169 +    if(flags & VSOCK_BROADCAST){
  46.170 +        err = setsock_broadcast(sock, 1);
  46.171          if(err < 0) goto exit;
  46.172      }
  46.173      if(flags & VSOCK_MULTICAST){
  46.174 @@ -344,6 +355,10 @@ int create_socket(int socktype, uint32_t
  46.175          err = bind(sock, addr, addr_n);
  46.176          if(err < 0) goto exit;
  46.177      }
  46.178 +    if(flags & VSOCK_NONBLOCK){
  46.179 +        err = fcntl(sock, F_SETFL, O_NONBLOCK);
  46.180 +        if(err < 0) goto exit;
  46.181 +    }
  46.182    exit:
  46.183      *val = (err ? -1 : sock);
  46.184      if(err) eprintf("> err=%d errno=%d\n", err, errno);
  46.185 @@ -360,7 +375,6 @@ int create_socket(int socktype, uint32_t
  46.186  int varp_mcast_open(uint32_t mcaddr, uint16_t port, int *val){
  46.187      int err = 0;
  46.188      int flags = VSOCK_REUSE;
  46.189 -    int multicast = MULTICAST(mcaddr);
  46.190      int sock = 0;
  46.191      
  46.192      dprintf(">\n");
  46.193 @@ -369,7 +383,7 @@ int varp_mcast_open(uint32_t mcaddr, uin
  46.194      
  46.195      err = create_socket(SOCK_DGRAM, mcaddr, port, flags, &sock);
  46.196      if(err < 0) goto exit;
  46.197 -    if(multicast){
  46.198 +    if(MULTICAST(mcaddr)){
  46.199          err = setsock_multicast_ttl(sock, 1);
  46.200          if(err < 0) goto exit;
  46.201      }
  46.202 @@ -398,47 +412,82 @@ int varp_ucast_open(uint32_t addr, u16 p
  46.203      return err;
  46.204  }
  46.205  
  46.206 -/* Here because inline in 'socket.c'. */
  46.207 -#ifndef sockfd_put
  46.208 -#define sockfd_put(sock) fput((sock)->file)
  46.209 -#endif
  46.210 +/**
  46.211 + * Return code > 0 means the handler owns the packet.
  46.212 + * Return code <= 0 means we still own it, with < 0 meaning
  46.213 + * an error.
  46.214 + */
  46.215 +static int handle_varp_skb(struct sk_buff *skb){
  46.216 +    static int count = 0;
  46.217 +    int err = 0;
  46.218 +    count++;
  46.219 +    switch(skb->pkt_type){
  46.220 +    case PACKET_BROADCAST:
  46.221 +    case PACKET_MULTICAST:
  46.222 +        vnet_forward_send(skb);
  46.223 +        /* Fall through. */
  46.224 +    case PACKET_HOST:
  46.225 +        err = varp_handle_message(skb);
  46.226 +        break;
  46.227 +    case PACKET_OTHERHOST:
  46.228 +        dprintf("> PACKET_OTHERHOST\n");
  46.229 +        break;
  46.230 +    case PACKET_OUTGOING:
  46.231 +        dprintf("> PACKET_OUTGOING\n");
  46.232 +        break;
  46.233 +    case PACKET_FASTROUTE:
  46.234 +        dprintf("> PACKET_FASTROUTE\n");
  46.235 +        break;
  46.236 +    case PACKET_LOOPBACK:
  46.237 +        // Outbound mcast/bcast are echoed with this type. Drop.
  46.238 +        dprintf("> LOOP src=" IPFMT " dst=" IPFMT " dev=%s\n",
  46.239 +                NIPQUAD(skb->nh.iph->saddr),
  46.240 +                NIPQUAD(skb->nh.iph->daddr),
  46.241 +                (skb->dev ? skb->dev->name : "??"));
  46.242 +      default:
  46.243 +        // Drop.
  46.244 +        break;
  46.245 +    }
  46.246 +    if(err <= 0){
  46.247 +        kfree_skb(skb);
  46.248 +    }
  46.249 +    return (err < 0 ? err : 0);
  46.250 +}
  46.251  
  46.252 -/** Get the next skb from a socket's receive queue.
  46.253 +/** Handle some skbs on a varp socket (if any).
  46.254   *
  46.255   * @param fd socket file descriptor
  46.256 - * @return skb or NULL
  46.257 + * @param n maximum number of skbs to handle
  46.258 + * @return number of skbs handled
  46.259   */
  46.260 -static struct sk_buff *get_sock_skb(int fd){
  46.261 +static int handle_varp_sock(int fd, int n){
  46.262 +    int ret = 0;
  46.263      int err = 0;
  46.264 -    struct sk_buff *skb = NULL;
  46.265 +    struct sk_buff *skb;
  46.266      struct socket *sock = NULL;
  46.267  
  46.268      sock = sockfd_lookup(fd, &err);
  46.269      if (!sock){
  46.270 -        dprintf("> no sock for fd=%d\n", fd);
  46.271 +        wprintf("> no sock for fd=%d\n", fd);
  46.272          goto exit;
  46.273      }
  46.274 -    skb = skb_dequeue(&sock->sk->SK_RECEIVE_QUEUE);
  46.275 -    //skb = skb_recv_datagram(sock->sk, 0, 1, &recv_err);
  46.276 +    for( ; ret < n; ret++){
  46.277 +        if(!sock->sk) break;
  46.278 +        skb = skb_dequeue(&sock->sk->sk_receive_queue);
  46.279 +        if(!skb) break;
  46.280 +        // Call the skb destructor so it isn't charged to the socket anymore.
  46.281 +        // An skb from a socket receive queue is charged to the socket
  46.282 +        // by skb_set_owner_r() until its destructor is called.
  46.283 +        // If the destructor is not called the socket will run out of
  46.284 +        // receive queue space and be unable to accept incoming skbs.
  46.285 +        // The destructor used is sock_rfree(), see 'include/net/sock.h'.
  46.286 +        // Other destructors: sock_wfree, sk_stream_rfree.
  46.287 +        skb_orphan(skb);
  46.288 +        handle_varp_skb(skb);
  46.289 +    }
  46.290      sockfd_put(sock);
  46.291    exit:
  46.292 -    return skb;
  46.293 -}
  46.294 -
  46.295 -/** Handle the next skb on a socket (if any).
  46.296 - *
  46.297 - * @param fd socket file descriptor
  46.298 - * @return 1 if there was an skb, 0 otherwise
  46.299 - */
  46.300 -static int handle_sock_skb(int fd){
  46.301 -    int ret = 0;
  46.302 -    struct sk_buff *skb = get_sock_skb(fd);
  46.303 -    if(skb){
  46.304 -        ret = 1;
  46.305 -        dprintf("> skb fd=%d skb=%p\n", fd, skb);
  46.306 -        varp_handle_message(skb);
  46.307 -        kfree_skb(skb);
  46.308 -    }
  46.309 +    dprintf("< ret=%d\n", ret);
  46.310      return ret;
  46.311  }
  46.312  
  46.313 @@ -449,16 +498,16 @@ static int handle_sock_skb(int fd){
  46.314   * @return 0 on success, error code otherwise
  46.315   */
  46.316  int sock_add_wait_queue(int fd, wait_queue_t *waitq){
  46.317 -    int err = 0;
  46.318 +    int err = -EINVAL;
  46.319      struct socket *sock = NULL;
  46.320  
  46.321 -    dprintf("> fd=%d\n", fd);
  46.322 +    if(fd < 0) goto exit;
  46.323      sock = sockfd_lookup(fd, &err);
  46.324      if (!sock) goto exit;
  46.325 -    add_wait_queue(sock->sk->SK_SLEEP, waitq);
  46.326 +    add_wait_queue(sock->sk->sk_sleep, waitq);
  46.327      sockfd_put(sock);
  46.328 +    err = 0;
  46.329    exit:
  46.330 -    dprintf("< err=%d\n", err);
  46.331      return err;
  46.332  }
  46.333  
  46.334 @@ -469,17 +518,94 @@ int sock_add_wait_queue(int fd, wait_que
  46.335   * @return 0 on success, error code otherwise
  46.336   */
  46.337  int sock_remove_wait_queue(int fd, wait_queue_t *waitq){
  46.338 -    int err = 0;
  46.339 +    int err = -EINVAL;
  46.340      struct socket *sock = NULL;
  46.341  
  46.342 +    if(fd < 0) goto exit;
  46.343      sock = sockfd_lookup(fd, &err);
  46.344      if (!sock) goto exit;
  46.345 -    remove_wait_queue(sock->sk->SK_SLEEP, waitq);
  46.346 +    remove_wait_queue(sock->sk->sk_sleep, waitq);
  46.347      sockfd_put(sock);
  46.348 +    err = 0;
  46.349    exit:
  46.350      return err;
  46.351  }
  46.352  
  46.353 +#if 0
  46.354 +// Default data ready function on a socket.
  46.355 +static void sock_def_readable(struct sock *sk, int len)
  46.356 +{
  46.357 +	read_lock(&sk->sk_callback_lock);
  46.358 +	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
  46.359 +		wake_up_interruptible(sk->sk_sleep);
  46.360 +	sk_wake_async(sk,1,POLL_IN);
  46.361 +	read_unlock(&sk->sk_callback_lock);
  46.362 +}
  46.363 +#endif
  46.364 +
  46.365 +static void sock_data_ready(struct sock *sk, int len){
  46.366 +    struct sk_buff *skb;
  46.367 +    //read_lock(&sk->sk_callback_lock);
  46.368 +    skb = skb_dequeue(&sk->sk_receive_queue);
  46.369 +    if(skb){
  46.370 +        skb_orphan(skb);
  46.371 +    }
  46.372 +    //read_unlock(&sk->sk_callback_lock);
  46.373 +    if(skb){
  46.374 +        handle_varp_skb(skb);
  46.375 +    }
  46.376 +}
  46.377 +
  46.378 +/** Set the data ready callback on a socket.
  46.379 + */
  46.380 +int sock_set_callback(int fd){
  46.381 +    int err = -EINVAL;
  46.382 +    struct socket *sock = NULL;
  46.383 +
  46.384 +    if(fd < 0) goto exit;
  46.385 +    sock = sockfd_lookup(fd, &err);
  46.386 +    if (!sock) goto exit;
  46.387 +    sock->sk->sk_data_ready = sock_data_ready;
  46.388 +    sockfd_put(sock);
  46.389 +    err = 0;
  46.390 +  exit:
  46.391 +    return err;
  46.392 +}
  46.393 +
  46.394 +/** Open the sockets. */
  46.395 +int varp_sockets_open(u32 mcaddr, u16 port){
  46.396 +    int err = 0;
  46.397 +    mm_segment_t oldfs;
  46.398 +
  46.399 +    dprintf("> mcaddr=%u.%u.%u.%u port=%u\n", NIPQUAD(mcaddr), ntohs(port));
  46.400 +    oldfs = change_fs(KERNEL_DS);
  46.401 +    err = varp_mcast_open(mcaddr, port, &varp_mcast_sock);
  46.402 +    if(err < 0 ) goto exit;
  46.403 +    err = varp_ucast_open(INADDR_ANY, port, &varp_ucast_sock);
  46.404 +    if(err < 0 ) goto exit;
  46.405 +    sock_set_callback(varp_ucast_sock);
  46.406 +    sock_set_callback(varp_mcast_sock);
  46.407 +  exit:
  46.408 +    set_fs(oldfs);
  46.409 +    dprintf("< err=%d\n", err);
  46.410 +    return err;
  46.411 +}	
  46.412 +
  46.413 +/** Close the sockets. */
  46.414 +void varp_sockets_close(void){
  46.415 +    mm_segment_t oldfs;
  46.416 +    oldfs = change_fs(KERNEL_DS);
  46.417 +    if(varp_mcast_sock >= 0){
  46.418 +        shutdown(varp_mcast_sock, 2);
  46.419 +        varp_mcast_sock = -1;
  46.420 +    }
  46.421 +    if(varp_ucast_sock >= 0){
  46.422 +        shutdown(varp_ucast_sock, 2);
  46.423 +        varp_ucast_sock = -1;
  46.424 +    }
  46.425 +    set_fs(oldfs);
  46.426 +}
  46.427 +
  46.428  /** Loop handling the varp sockets.
  46.429   * We use kernel API for this (waitqueue, schedule_timeout) instead
  46.430   * of select because the select syscall was returning EFAULT. Oh well.
  46.431 @@ -489,82 +615,57 @@ int sock_remove_wait_queue(int fd, wait_
  46.432   */
  46.433  int varp_main(void *arg){
  46.434      int err = 0;
  46.435 -    long timeout = 3 * HZ;
  46.436 +    long timeout = 1 * HZ;
  46.437      int count = 0;
  46.438 -    int n = 0;
  46.439      DECLARE_WAITQUEUE(mcast_wait, current);
  46.440      DECLARE_WAITQUEUE(ucast_wait, current);
  46.441  
  46.442      dprintf("> start\n");
  46.443 -    atomic_set(&varp_running, 1);
  46.444 +    snprintf(current->comm, sizeof(current->comm), "varp_main");
  46.445 +
  46.446      err = sock_add_wait_queue(varp_mcast_sock, &mcast_wait);
  46.447 +    if(err) goto exit_mcast_sock;
  46.448      err = sock_add_wait_queue(varp_ucast_sock, &ucast_wait);
  46.449 -    for(n = 1; atomic_read(&varp_run) == 1; n++){
  46.450 +    if(err) goto exit_ucast_sock;
  46.451 +    atomic_set(&varp_state, VARP_STATE_RUNNING);
  46.452 +    for( ; atomic_read(&varp_run); ){
  46.453          count = 0;
  46.454 -        count += handle_sock_skb(varp_mcast_sock);
  46.455 -        count += handle_sock_skb(varp_ucast_sock);
  46.456 +        count += handle_varp_sock(varp_mcast_sock, 1);
  46.457 +        count += handle_varp_sock(varp_ucast_sock, 16);
  46.458          if(!count){
  46.459 -            // No skbs were handled, so go back to sleep.
  46.460 +            if(!atomic_read(&varp_run)) break;
  46.461 +            // No skbs were handled, go to sleep.
  46.462              set_current_state(TASK_INTERRUPTIBLE);
  46.463              schedule_timeout(timeout);
  46.464 -            current->state = TASK_RUNNING;
  46.465 +            __set_current_state(TASK_RUNNING);
  46.466          }
  46.467      }
  46.468 +  exit_ucast_sock:
  46.469 +    sock_remove_wait_queue(varp_ucast_sock, &ucast_wait);
  46.470 +  exit_mcast_sock:
  46.471      sock_remove_wait_queue(varp_mcast_sock, &mcast_wait);
  46.472 -    sock_remove_wait_queue(varp_ucast_sock, &ucast_wait);
  46.473 -    atomic_set(&varp_running, 0);
  46.474 +    varp_sockets_close();
  46.475 +    if(err){
  46.476 +        eprintf("%s< err=%d\n", __FUNCTION__, err);
  46.477 +    }
  46.478 +    varp_thread_err = err;
  46.479 +    atomic_set(&varp_state, VARP_STATE_EXITED);
  46.480      //MOD_DEC_USE_COUNT;
  46.481 -    dprintf("< stop err=%d\n", err);
  46.482 -    return err;
  46.483 -}
  46.484 -
  46.485 -/** Start the varp thread.
  46.486 - *
  46.487 - * @return 0 on success, error code otherwise
  46.488 - */
  46.489 -int varp_start(void){
  46.490 -    int err = 0;
  46.491 -    void *args = NULL;
  46.492 -    int flags = 0;
  46.493 -    long pid = 0;
  46.494 -    
  46.495 -    dprintf(">\n");
  46.496 -    //flags |= CLONE_VM;
  46.497 -    flags |= CLONE_FS;
  46.498 -    flags |= CLONE_FILES;
  46.499 -    flags |= CLONE_SIGHAND;
  46.500 -    atomic_set(&varp_run, 1);
  46.501 -    atomic_set(&varp_running, 0);
  46.502 -    pid = kernel_thread(varp_main, args, flags);
  46.503 -    dprintf("< pid=%ld\n", pid);
  46.504      return err;
  46.505  }
  46.506  
  46.507  /** Close the varp sockets and stop the thread handling them.
  46.508   */
  46.509  void varp_close(void){
  46.510 -    mm_segment_t oldfs;
  46.511 -    long timeout = 1 * HZ;
  46.512      int tries = 10;
  46.513      dprintf(">\n");
  46.514      // Tell the varp thread to stop and wait a while for it.
  46.515      atomic_set(&varp_run, 0);
  46.516 -    while(atomic_read(&varp_running) && tries-- > 0){
  46.517 +    while(atomic_read(&varp_state) == VARP_STATE_RUNNING && tries-- > 0){
  46.518          set_current_state(TASK_INTERRUPTIBLE);
  46.519 -        schedule_timeout(timeout);
  46.520 -        current->state = TASK_RUNNING;
  46.521 +        schedule_timeout(HZ / 2);
  46.522 +        __set_current_state(TASK_RUNNING);
  46.523      }
  46.524 -    // Close the sockets.
  46.525 -    oldfs = change_fs(KERNEL_DS);
  46.526 -    if(varp_mcast_sock > 0){
  46.527 -        shutdown(varp_mcast_sock, 2);
  46.528 -        varp_mcast_sock = -1;
  46.529 -    }
  46.530 -    if(varp_ucast_sock > 0){
  46.531 -        shutdown(varp_ucast_sock, 2);
  46.532 -        varp_ucast_sock = -1;
  46.533 -    }
  46.534 -    set_fs(oldfs);
  46.535      //MOD_DEC_USE_COUNT;
  46.536      dprintf("<\n");
  46.537  }    
  46.538 @@ -577,24 +678,25 @@ void varp_close(void){
  46.539   */
  46.540  int varp_open(u32 mcaddr, u16 port){
  46.541      int err = 0;
  46.542 -    mm_segment_t oldfs;
  46.543 -
  46.544 +    
  46.545      //MOD_INC_USE_COUNT;
  46.546 -    dprintf("> mcaddr=%u.%u.%u.%u port=%u\n",
  46.547 -            NIPQUAD(mcaddr), ntohs(port));
  46.548 -    oldfs = change_fs(KERNEL_DS);
  46.549 -    err = varp_mcast_open(mcaddr, port, &varp_mcast_sock);
  46.550 -    if(err < 0 ) goto exit;
  46.551 -    err = varp_ucast_open(INADDR_ANY, port, &varp_ucast_sock);
  46.552 -    if(err < 0 ) goto exit;
  46.553 -    set_fs(oldfs);
  46.554 -    err = varp_start();
  46.555 +    dprintf(">\n");
  46.556 +    err = varp_sockets_open(mcaddr, port);
  46.557 +    if(err) goto exit;
  46.558 +    atomic_set(&varp_run, 1);
  46.559 +    atomic_set(&varp_state, VARP_STATE_NONE);
  46.560 +    kernel_thread(varp_main, NULL, (CLONE_FS | CLONE_FILES | CLONE_SIGHAND));
  46.561 +#if 0
  46.562 +    while(atomic_read(&varp_state) == VARP_STATE_NONE){
  46.563 +        set_current_state(TASK_INTERRUPTIBLE);
  46.564 +        schedule_timeout(1 * HZ);
  46.565 +        __set_current_state(TASK_RUNNING);
  46.566 +    }
  46.567 +    err = varp_thread_err;
  46.568 +#endif
  46.569    exit:
  46.570 -    set_fs(oldfs);
  46.571      if(err){
  46.572 -        varp_close();
  46.573 +        wprintf("> err=%d\n", err);
  46.574      }
  46.575 -    dprintf("< err=%d\n", err);
  46.576      return err;
  46.577 -}	
  46.578 -
  46.579 +}
    47.1 --- a/tools/vnet/vnet-module/varp_util.c	Thu Feb 09 16:09:00 2006 +0100
    47.2 +++ b/tools/vnet/vnet-module/varp_util.c	Thu Feb 09 16:12:11 2006 +0100
    47.3 @@ -47,7 +47,7 @@ int VnetId_aton(const char *s, VnetId *v
    47.4      char buf[5];
    47.5      int buf_n = sizeof(buf) - 1;
    47.6      int i, n;
    47.7 -    const int elts_n = 8;
    47.8 +    const int elts_n = VNETID_SIZE16;
    47.9  
   47.10      q = s;
   47.11      p = strchr(q, ':');
    48.1 --- a/tools/vnet/vnet-module/varp_util.h	Thu Feb 09 16:09:00 2006 +0100
    48.2 +++ b/tools/vnet/vnet-module/varp_util.h	Thu Feb 09 16:12:11 2006 +0100
    48.3 @@ -1,5 +1,5 @@
    48.4  /*
    48.5 - * Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
    48.6 + * Copyright (C) 2004, 2005 Mike Wray <mike.wray@hp.com>
    48.7   *
    48.8   * This program is free software; you can redistribute it and/or modify
    48.9   * it under the terms of the GNU General Public License as published by the 
   48.10 @@ -84,59 +84,12 @@ extern int VnetId_aton(const char *s, Vn
   48.11   */
   48.12  static inline struct VnetId toVnetId(uint32_t vnetid){
   48.13      struct VnetId vnet = {};
   48.14 -    vnet.u.vnet32[3] = htonl(vnetid);
   48.15 +    vnet.u.vnet32[VNETID_SIZE32 - 1] = htonl(vnetid);
   48.16      return vnet;
   48.17  }
   48.18  
   48.19 -static inline uint32_t VnetId_hash(uint32_t h, VnetId *vnet)
   48.20 -{
   48.21 -    h = hash_hul(h, vnet->u.vnet32[0]);
   48.22 -    h = hash_hul(h, vnet->u.vnet32[1]);
   48.23 -    h = hash_hul(h, vnet->u.vnet32[2]);
   48.24 -    h = hash_hul(h, vnet->u.vnet32[3]);
   48.25 -    return h;
   48.26 -}
   48.27 -
   48.28 -static inline int VnetId_eq(VnetId *vnet1, VnetId *vnet2)
   48.29 -{
   48.30 -    return memcmp(vnet1, vnet2, sizeof(VnetId)) == 0;
   48.31 -}
   48.32 -
   48.33 -static inline uint32_t VarpAddr_hash(uint32_t h, VarpAddr *addr)
   48.34 -{
   48.35 -    h = hash_hul(h, addr->family);
   48.36 -    if(addr->family == AF_INET6){
   48.37 -        h = hash_hul(h, addr->u.ip6.s6_addr32[0]);
   48.38 -        h = hash_hul(h, addr->u.ip6.s6_addr32[1]);
   48.39 -        h = hash_hul(h, addr->u.ip6.s6_addr32[2]);
   48.40 -        h = hash_hul(h, addr->u.ip6.s6_addr32[3]);
   48.41 -    } else {
   48.42 -        h = hash_hul(h, addr->u.ip4.s_addr);
   48.43 -    }
   48.44 -    return h;
   48.45 -}
   48.46 -
   48.47 -static inline int VarpAddr_eq(VarpAddr *addr1, VarpAddr*addr2)
   48.48 -{
   48.49 -    return memcmp(addr1, addr2, sizeof(VarpAddr)) == 0;
   48.50 -}
   48.51 -
   48.52 -static inline uint32_t Vmac_hash(uint32_t h, Vmac *vmac)
   48.53 -{
   48.54 -    h = hash_hul(h,
   48.55 -                 (vmac->mac[0] << 24) |
   48.56 -                 (vmac->mac[1] << 16) |
   48.57 -                 (vmac->mac[2] <<  8) |
   48.58 -                 (vmac->mac[3]      ));
   48.59 -    h = hash_hul(h, 
   48.60 -                 (vmac->mac[4] <<   8) |
   48.61 -                 (vmac->mac[5]       ));
   48.62 -    return h;
   48.63 -}
   48.64 -
   48.65 -static inline int Vmac_eq(Vmac *vmac1, Vmac *vmac2)
   48.66 -{
   48.67 -    return memcmp(vmac1, vmac2, sizeof(Vmac)) == 0;
   48.68 +static inline int VnetId_eq(VnetId *id1, VnetId *id2){
   48.69 +    return memcmp(id1, id2, sizeof(VnetId)) == 0;
   48.70  }
   48.71  
   48.72  #endif /* _VNET_VARP_UTIL_H */
    49.1 --- a/tools/vnet/vnet-module/vif.c	Thu Feb 09 16:09:00 2006 +0100
    49.2 +++ b/tools/vnet/vnet-module/vif.c	Thu Feb 09 16:12:11 2006 +0100
    49.3 @@ -1,5 +1,5 @@
    49.4  /*
    49.5 - * Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
    49.6 + * Copyright (C) 2004, 2005 Mike Wray <mike.wray@hp.com>
    49.7   *
    49.8   * This program is free software; you can redistribute it and/or modify
    49.9   * it under the terms of the GNU General Public License as published by the 
   49.10 @@ -17,61 +17,78 @@
   49.11   *
   49.12   */
   49.13  
   49.14 +#ifdef __KERNEL__
   49.15 +
   49.16  #include <linux/config.h>
   49.17  #include <linux/kernel.h>
   49.18  #include <linux/module.h>
   49.19 -#include <linux/init.h>
   49.20 -#include <linux/string.h>
   49.21 -#include <linux/version.h>
   49.22 -
   49.23 -#include <linux/net.h>
   49.24 -#include <linux/in.h>
   49.25 -#include <linux/inet.h>
   49.26 -#include <linux/netdevice.h>
   49.27 -#include <linux/udp.h>
   49.28 -
   49.29 -#include <net/ip.h>
   49.30 -#include <net/protocol.h>
   49.31 -#include <net/route.h>
   49.32 -#include <linux/skbuff.h>
   49.33  #include <linux/spinlock.h>
   49.34  
   49.35 -#include <etherip.h>
   49.36 -#include <if_varp.h>
   49.37 -#include <vnet_dev.h>
   49.38 +#else
   49.39 +
   49.40 +#include "sys_kernel.h"
   49.41 +#include "spinlock.h"
   49.42 +#include "skbuff.h"
   49.43 +
   49.44 +#endif
   49.45 +
   49.46  #include <vif.h>
   49.47  #include <varp.h>
   49.48 +#include <varp_util.h>
   49.49  
   49.50  #include "allocate.h"
   49.51 +#include "iostream.h"
   49.52  #include "hash_table.h"
   49.53 -#include "sys_net.h"
   49.54 -#include "sys_string.h"
   49.55 +#include "timer_util.h"
   49.56  
   49.57  #define MODULE_NAME "VNET"
   49.58  #define DEBUG 1
   49.59  #undef DEBUG
   49.60  #include "debug.h"
   49.61  
   49.62 +/** Vif table ttl - interval between sweeps of old vifs. */
   49.63 +#define VIF_TABLE_TTL (60*HZ)
   49.64 +
   49.65 +/** Vif entry ttl - a vif entry older than this is removed. */
   49.66 +#define VIF_ENTRY_TTL (60*HZ)
   49.67 +
   49.68  /** Table of vifs indexed by VifKey. */
   49.69  HashTable *vif_table = NULL;
   49.70  rwlock_t vif_table_lock = RW_LOCK_UNLOCKED;
   49.71 +struct timer_list vif_table_timer = {};
   49.72 +int vif_table_sweeps = 0;
   49.73  
   49.74  #define vif_read_lock(flags)    read_lock_irqsave(&vif_table_lock, (flags))
   49.75  #define vif_read_unlock(flags)  read_unlock_irqrestore(&vif_table_lock, (flags))
   49.76  #define vif_write_lock(flags)   write_lock_irqsave(&vif_table_lock, (flags))
   49.77  #define vif_write_unlock(flags) write_unlock_irqrestore(&vif_table_lock, (flags))
   49.78  
   49.79 -void vif_print(void){
   49.80 +void vif_entry_print(Vif *vif, IOStream *io){
   49.81 +    char vnetbuf[VNET_ID_BUF];
   49.82 +    unsigned long now = jiffies;
   49.83 +
   49.84 +    IOStream_print(io, "(vif\n");
   49.85 +    IOStream_print(io, " (vnet %s)\n", VnetId_ntoa(&vif->vnet, vnetbuf));
   49.86 +    IOStream_print(io, " (vmac " MACFMT ")\n", MAC6TUPLE(vif->vmac.mac));
   49.87 +    IOStream_print(io, " (age %u)\n", now - vif->timestamp);
   49.88 +    IOStream_print(io, ")\n");
   49.89 +}
   49.90 +
   49.91 +void vif_print(IOStream *io){
   49.92      HashTable_for_decl(entry);
   49.93      Vif *vif;
   49.94      unsigned long flags;
   49.95 -    char vnetbuf[VNET_ID_BUF];
   49.96  
   49.97      vif_read_lock(flags);
   49.98 +    IOStream_print(io, "(viftable\n");
   49.99 +    IOStream_print(io, " (table_ttl %u)\n", VIF_TABLE_TTL);
  49.100 +    IOStream_print(io, " (entry_ttl %u)\n", VIF_ENTRY_TTL);
  49.101 +    IOStream_print(io, " (sweeps %d)\n", vif_table_sweeps);
  49.102 +    IOStream_print(io, ")\n");
  49.103 +    
  49.104      HashTable_for_each(entry, vif_table){
  49.105          vif = entry->value;
  49.106 -        printk(KERN_INFO "VIF(vnet=%s vmac=" MACFMT ")\n",
  49.107 -               VnetId_ntoa(&vif->vnet, vnetbuf), MAC6TUPLE(vif->vmac.mac));
  49.108 +        vif_entry_print(vif, io);
  49.109      }
  49.110      vif_read_unlock(flags);
  49.111  }
  49.112 @@ -94,12 +111,8 @@ void vif_incref(Vif *vif){
  49.113   * @param k key (VifKey)
  49.114   * @return hashcode
  49.115   */
  49.116 -Hashcode vif_key_hash_fn(void *k){
  49.117 -    VifKey *key = k;
  49.118 -    Hashcode h = 0;
  49.119 -    h = VnetId_hash(h, &key->vnet);
  49.120 -    h = Vmac_hash(h, &key->vmac);
  49.121 -    return h;
  49.122 +static Hashcode vif_key_hash_fn(void *k){
  49.123 +    return hash_hvoid(0, k, sizeof(VifKey));
  49.124  }
  49.125  
  49.126  /** Test equality for keys in the vif table.
  49.127 @@ -109,11 +122,8 @@ Hashcode vif_key_hash_fn(void *k){
  49.128   * @param k2 key to compare (VifKey)
  49.129   * @return 1 if equal, 0 otherwise
  49.130   */
  49.131 -int vif_key_equal_fn(void *k1, void *k2){
  49.132 -    VifKey *key1 = k1;
  49.133 -    VifKey *key2 = k2;
  49.134 -    return (VnetId_eq(&key1->vnet , &key2->vnet) &&
  49.135 -            Vmac_eq(&key1->vmac, &key2->vmac));
  49.136 +static int vif_key_equal_fn(void *k1, void *k2){
  49.137 +    return memcmp(k1, k2, sizeof(VifKey)) == 0;
  49.138  }
  49.139  
  49.140  /** Free an entry in the vif table.
  49.141 @@ -132,18 +142,17 @@ static void vif_entry_free_fn(HashTable 
  49.142  }
  49.143  
  49.144  /** Lookup a vif.
  49.145 + * Caller must hold vif lock.
  49.146   *
  49.147   * @param vnet vnet id
  49.148   * @param mac MAC address
  49.149   * @return 0 on success, -ENOENT otherwise
  49.150   */
  49.151 -int vif_lookup(VnetId *vnet, Vmac *vmac, Vif **vif){
  49.152 +static int _vif_lookup(VnetId *vnet, Vmac *vmac, Vif **vif){
  49.153      int err = 0;
  49.154      VifKey key = { .vnet = *vnet, .vmac = *vmac };
  49.155      HTEntry *entry = NULL;
  49.156 -    unsigned long flags;
  49.157      
  49.158 -    vif_read_lock(flags);
  49.159      entry = HashTable_get_entry(vif_table, &key);
  49.160      if(entry){
  49.161          *vif = entry->value;
  49.162 @@ -152,23 +161,39 @@ int vif_lookup(VnetId *vnet, Vmac *vmac,
  49.163          *vif = NULL;
  49.164          err = -ENOENT;
  49.165      }
  49.166 +    return err;
  49.167 +}
  49.168 +
  49.169 +/** Lookup a vif.
  49.170 + *
  49.171 + * @param vnet vnet id
  49.172 + * @param mac MAC address
  49.173 + * @return 0 on success, -ENOENT otherwise
  49.174 + */
  49.175 +int vif_lookup(VnetId *vnet, Vmac *vmac, Vif **vif){
  49.176 +    unsigned long flags;    
  49.177 +    int err;
  49.178 +
  49.179 +    vif_read_lock(flags);
  49.180 +    err = _vif_lookup(vnet, vmac, vif);
  49.181      vif_read_unlock(flags);
  49.182      return err;
  49.183  }
  49.184  
  49.185  /** Create a new vif.
  49.186 + * Entry must not exist.
  49.187 + * Caller must hold vif lock.
  49.188   *
  49.189   * @param vnet vnet id
  49.190   * @param mac MAC address
  49.191   * @return 0 on success, negative error code otherwise
  49.192   */
  49.193 -int vif_add(VnetId *vnet, Vmac *vmac, Vif **val){
  49.194 +static int _vif_add(VnetId *vnet, Vmac *vmac, Vif **val){
  49.195      int err = 0;
  49.196      Vif *vif = NULL;
  49.197      HTEntry *entry;
  49.198 -    unsigned long flags;
  49.199 +    unsigned long now = jiffies;
  49.200  
  49.201 -    dprintf("> vnet=%d\n", vnet);
  49.202      vif = ALLOCATE(Vif);
  49.203      if(!vif){
  49.204          err = -ENOMEM;
  49.205 @@ -177,9 +202,8 @@ int vif_add(VnetId *vnet, Vmac *vmac, Vi
  49.206      atomic_set(&vif->refcount, 1);
  49.207      vif->vnet = *vnet;
  49.208      vif->vmac = *vmac;
  49.209 -    vif_write_lock(flags);
  49.210 +    vif->timestamp = now;
  49.211      entry = HashTable_add(vif_table, vif, vif);
  49.212 -    vif_write_unlock(flags);
  49.213      if(!entry){
  49.214          err = -ENOMEM;
  49.215          deallocate(vif);
  49.216 @@ -189,15 +213,13 @@ int vif_add(VnetId *vnet, Vmac *vmac, Vi
  49.217      vif_incref(vif);
  49.218    exit:
  49.219      *val = (err ? NULL : vif);
  49.220 -    dprintf("< err=%d\n", err);
  49.221      return err;
  49.222  }
  49.223  
  49.224 -/** Delete an entry.
  49.225 +/** Delete a vif entry.
  49.226   *
  49.227   * @param vnet vnet id
  49.228   * @param mac MAC address
  49.229 - * @param coaddr return parameter for care-of address
  49.230   * @return number of entries deleted, or negative error code
  49.231   */
  49.232  int vif_remove(VnetId *vnet, Vmac *vmac){
  49.233 @@ -211,35 +233,133 @@ int vif_remove(VnetId *vnet, Vmac *vmac)
  49.234      return err;
  49.235  }
  49.236  
  49.237 -void vif_purge(void){
  49.238 -    HashTable_clear(vif_table);
  49.239 +/** Delete all vifs on a vnet.
  49.240 + *
  49.241 + * @param vnet vnet id
  49.242 + * @return number of entries deleted
  49.243 + */
  49.244 +int vif_remove_vnet(VnetId *vnet){
  49.245 +    int count = 0;
  49.246 +    unsigned long flags;
  49.247 +    HashTable_for_decl(entry);
  49.248 +
  49.249 +    
  49.250 +    vif_write_lock(flags);
  49.251 +    HashTable_for_each(entry, vif_table){
  49.252 +        Vif *vif = entry->value;
  49.253 +        if(VnetId_eq(&vif->vnet, vnet)){
  49.254 +            count += HashTable_remove(vif_table, vif);
  49.255 +        }
  49.256 +    }
  49.257 +    vif_write_unlock(flags);
  49.258 +    return count;
  49.259  }
  49.260  
  49.261 -int vif_create(VnetId *vnet, Vmac *vmac, Vif **vif){
  49.262 +/** Purge the vif table.
  49.263 + */
  49.264 +void vif_purge(void){
  49.265 +    unsigned long flags;
  49.266 +    vif_write_lock(flags);
  49.267 +    HashTable_clear(vif_table);
  49.268 +    vif_write_unlock(flags);
  49.269 +}
  49.270 +
  49.271 +/** Sweep old vif entries from the vif table.
  49.272 + */
  49.273 +void vif_sweep(void){
  49.274 +    HashTable_for_decl(entry);
  49.275 +    Vif *vif;
  49.276 +    int vif_count = 0;
  49.277 +    unsigned long now = jiffies;
  49.278 +    unsigned long old = VIF_ENTRY_TTL;
  49.279 +    unsigned long flags;
  49.280 +
  49.281 +    vif_write_lock(flags);
  49.282 +    vif_table_sweeps++;
  49.283 +    HashTable_for_each(entry, vif_table){
  49.284 +        vif = entry->value;
  49.285 +        vif_count++;
  49.286 +        if(!(vif->flags & VIF_FLAG_PERSISTENT)
  49.287 +           && (now - vif->timestamp > old)){
  49.288 +            iprintf("> Sweeping:\n");
  49.289 +            vif_entry_print(vif, iostdout);
  49.290 +            HashTable_remove(vif_table, entry->key);
  49.291 +        }
  49.292 +    }
  49.293 +    vif_write_unlock(flags);
  49.294 +}
  49.295 +
  49.296 +/** Create a new vif if it does not exist.
  49.297 + * Caller must hold vif lock.
  49.298 + *
  49.299 + * @param vnet vnet id
  49.300 + * @param mac MAC address
  49.301 + * @return 0 on success, negative error code otherwise
  49.302 + */
  49.303 +int _vif_create(VnetId *vnet, Vmac *vmac, Vif **vif){
  49.304      int err = 0;
  49.305  
  49.306 -    dprintf(">\n");
  49.307 -    if(vif_lookup(vnet, vmac, vif) == 0){
  49.308 -        vif_decref(*vif);
  49.309 -        err = -EEXIST;
  49.310 +    if(_vif_lookup(vnet, vmac, vif) == 0){
  49.311          goto exit;
  49.312      }
  49.313 -    err = vif_add(vnet, vmac, vif);
  49.314 +    err = _vif_add(vnet, vmac, vif);
  49.315    exit:
  49.316 -    if(err){
  49.317 -        *vif = NULL;
  49.318 -    }
  49.319 -    dprintf("< err=%d\n", err);
  49.320      return err;
  49.321  }
  49.322  
  49.323 +/** Create a new vif if it does not exist.
  49.324 + *
  49.325 + * @param vnet vnet id
  49.326 + * @param mac MAC address
  49.327 + * @return 0 on success, negative error code otherwise
  49.328 + */
  49.329 +int vif_create(VnetId *vnet, Vmac *vmac, int vflags, Vif **vif){
  49.330 +    int err = 0;
  49.331 +    unsigned long flags;
  49.332 +
  49.333 +    vif_write_lock(flags);
  49.334 +    err = _vif_create(vnet, vmac, vif);
  49.335 +    if(!err && *vif){
  49.336 +        (*vif)->flags = vflags;
  49.337 +    }
  49.338 +    vif_write_unlock(flags);
  49.339 +    return err;
  49.340 +}
  49.341 +
  49.342 +/** Update the timestamp for a vif.
  49.343 + *
  49.344 + * @param vnet vnet id
  49.345 + * @param mac MAC address
  49.346 + * @return 0 on success, negative error code otherwise
  49.347 + */
  49.348 +int vif_update(VnetId *vnet, Vmac *vmac){
  49.349 +    Vif *vif = NULL;
  49.350 +    int err = 0;
  49.351 +    unsigned long now = jiffies;
  49.352 +    unsigned long flags;
  49.353 +
  49.354 +    vif_write_lock(flags);
  49.355 +    err = _vif_create(vnet, vmac, &vif);
  49.356 +    if(err) goto exit;
  49.357 +    vif->timestamp = now;
  49.358 +    vif_decref(vif);
  49.359 +  exit:
  49.360 +    vif_write_unlock(flags);
  49.361 +    return err;
  49.362 +}
  49.363 +
  49.364 +static void vif_table_timer_fn(unsigned long arg){
  49.365 +    if(!vif_table) return;
  49.366 +    vif_sweep();
  49.367 +    timer_set(&vif_table_timer, VIF_TABLE_TTL);
  49.368 +}
  49.369 +    
  49.370  /** Initialize the vif table.
  49.371   *
  49.372   * @return 0 on success, error code otherwise
  49.373   */
  49.374  int vif_init(void){
  49.375      int err = 0;
  49.376 -    dprintf(">\n");
  49.377      vif_table = HashTable_new(0);
  49.378      if(!vif_table){
  49.379          err = -ENOMEM;
  49.380 @@ -249,12 +369,18 @@ int vif_init(void){
  49.381      vif_table->key_hash_fn   = vif_key_hash_fn;
  49.382      vif_table->key_equal_fn  = vif_key_equal_fn;
  49.383  
  49.384 +    timer_init(&vif_table_timer, vif_table_timer_fn, 0);
  49.385 +    timer_set(&vif_table_timer, VIF_TABLE_TTL);
  49.386 +
  49.387    exit:
  49.388 -    if(err < 0) wprintf("< err=%d\n", err);
  49.389 -    dprintf("< err=%d\n", err);
  49.390 +    if(err < 0){
  49.391 +        eprintf("> vif_init err=%d\n", err);
  49.392 +    }
  49.393      return err;
  49.394  }
  49.395  
  49.396  void vif_exit(void){
  49.397 +    timer_cancel(&vif_table_timer);
  49.398      HashTable_free(vif_table);
  49.399 +    vif_table = NULL;
  49.400  }
    50.1 --- a/tools/vnet/vnet-module/vif.h	Thu Feb 09 16:09:00 2006 +0100
    50.2 +++ b/tools/vnet/vnet-module/vif.h	Thu Feb 09 16:12:11 2006 +0100
    50.3 @@ -1,5 +1,5 @@
    50.4  /*
    50.5 - * Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
    50.6 + * Copyright (C) 2004, 2005 Mike Wray <mike.wray@hp.com>
    50.7   *
    50.8   * This program is free software; you can redistribute it and/or modify
    50.9   * it under the terms of the GNU General Public License as published by the 
   50.10 @@ -19,37 +19,44 @@
   50.11  #ifndef _VNET_VIF_H_
   50.12  #define _VNET_VIF_H_
   50.13  
   50.14 +#ifdef __KERNEL__
   50.15 +#include <asm/atomic.h>
   50.16 +#else
   50.17 +#include "spinlock.h"
   50.18 +#endif
   50.19 +
   50.20  #include <if_varp.h>
   50.21 -struct net_device;
   50.22 +struct IOStream;
   50.23  
   50.24  /** Key for entries in the vif table. */
   50.25  typedef struct VifKey {
   50.26 -    VnetId vnet;
   50.27 -    Vmac vmac;
   50.28 +    struct VnetId vnet;
   50.29 +    struct Vmac vmac;
   50.30  } VifKey;
   50.31  
   50.32  typedef struct Vif {
   50.33 -    VnetId vnet;
   50.34 -    Vmac vmac;
   50.35 -    struct net_device *dev;
   50.36 +    struct VnetId vnet;
   50.37 +    struct Vmac vmac;
   50.38      atomic_t refcount;
   50.39 +    unsigned long timestamp;
   50.40 +    int flags;
   50.41  } Vif;
   50.42  
   50.43 -struct HashTable;
   50.44 -extern struct HashTable *vif_table;
   50.45 -
   50.46 -extern void vif_print(void);
   50.47 +enum {
   50.48 +    VIF_FLAG_PERSISTENT = 1,
   50.49 +};
   50.50  
   50.51 -extern void vif_decref(Vif *vif);
   50.52 -extern void vif_incref(Vif *vif);
   50.53 -
   50.54 -extern int vif_create(struct VnetId *vnet, Vmac *vmac, Vif **vif);
   50.55 +extern void vif_print(struct IOStream *io);
   50.56  
   50.57 -extern int vif_create(VnetId *vnet, Vmac *vmac, Vif **vif);
   50.58 -extern int vif_add(struct VnetId *vnet, Vmac *vmac, Vif **vif);
   50.59 -extern int vif_lookup(struct VnetId *vnet, Vmac *vmac, Vif **vif);
   50.60 -extern int vif_remove(struct VnetId *vnet, Vmac *vmac);
   50.61 +extern void vif_decref(struct Vif *vif);
   50.62 +extern void vif_incref(struct Vif *vif);
   50.63 +
   50.64 +extern int vif_create(struct VnetId *vnet, struct Vmac *vmac, int flags, struct Vif **vif);
   50.65 +extern int vif_lookup(struct VnetId *vnet, struct Vmac *vmac, struct Vif **vif);
   50.66 +extern int vif_update(struct VnetId *vnet, struct Vmac *vmac);
   50.67 +extern int vif_remove(struct VnetId *vnet, struct Vmac *vmac);
   50.68  extern void vif_purge(void);
   50.69 +extern int vif_remove_vnet(struct VnetId *vnet);
   50.70  
   50.71  extern int vif_init(void);
   50.72  extern void vif_exit(void);
    51.1 --- a/tools/vnet/vnet-module/vnet.c	Thu Feb 09 16:09:00 2006 +0100
    51.2 +++ b/tools/vnet/vnet-module/vnet.c	Thu Feb 09 16:12:11 2006 +0100
    51.3 @@ -1,5 +1,5 @@
    51.4  /*
    51.5 - * Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
    51.6 + * Copyright (C) 2004, 2005 Mike Wray <mike.wray@hp.com>
    51.7   *
    51.8   * This program is free software; you can redistribute it and/or modify
    51.9   * it under the terms of the GNU General Public License as published by the 
   51.10 @@ -17,6 +17,7 @@
   51.11   *
   51.12   */
   51.13  
   51.14 +#ifdef __KERNEL__
   51.15  #include <linux/config.h>
   51.16  #include <linux/module.h>
   51.17  #include <linux/types.h>
   51.18 @@ -25,6 +26,7 @@
   51.19  #include <linux/errno.h>
   51.20  
   51.21  #include <linux/string.h>
   51.22 +#include <linux/spinlock.h>
   51.23  
   51.24  #include <linux/net.h>
   51.25  #include <linux/in.h>
   51.26 @@ -38,6 +40,22 @@
   51.27  #include <linux/skbuff.h>
   51.28  #include <net/checksum.h>
   51.29  
   51.30 +
   51.31 +#else 
   51.32 +
   51.33 +#include <netinet/in.h>
   51.34 +#include <arpa/inet.h>
   51.35 +
   51.36 +#include "sys_kernel.h"
   51.37 +#include "spinlock.h"
   51.38 +#include "skbuff.h"
   51.39 +
   51.40 +#include <linux/ip.h>  // For struct iphdr.
   51.41 +
   51.42 +extern int netif_rx(struct sk_buff *skb);
   51.43 +
   51.44 +#endif
   51.45 +
   51.46  #include <tunnel.h>
   51.47  #include <sa.h>
   51.48  #include <varp.h>
   51.49 @@ -45,16 +63,22 @@
   51.50  #include <esp.h>
   51.51  #include <etherip.h>
   51.52  #include <random.h>
   51.53 -#include <tunnel.h>
   51.54 +
   51.55 +#include <skb_context.h>
   51.56  
   51.57  #include <skb_util.h>
   51.58  #include <vnet_dev.h>
   51.59  #include <vnet.h>
   51.60 +#include <vnet_forward.h>
   51.61  #include <vif.h>
   51.62  #include <vnet_ioctl.h>
   51.63 +#include <sa.h>
   51.64 +#ifdef __KERNEL__
   51.65  #include <sa_algorithm.h>
   51.66 +#endif
   51.67  
   51.68  #include "allocate.h"
   51.69 +#include "iostream.h"
   51.70  #include "hash_table.h"
   51.71  #include "sys_net.h"
   51.72  #include "sys_string.h"
   51.73 @@ -68,19 +92,18 @@
   51.74   */
   51.75  int vnet_security_default = SA_AUTH ; //| SA_CONF;
   51.76  
   51.77 -/** Key for entries in the vnet address table. */
   51.78 -typedef struct VnetAddrKey {
   51.79 -    /** Vnet id. */
   51.80 -    VnetId vnet;
   51.81 -    /** MAC address. */
   51.82 -    unsigned char mac[ETH_ALEN];
   51.83 -} VnetAddrKey;
   51.84 -
   51.85  /** The physical vnet. */
   51.86  Vnet *vnet_physical = NULL;
   51.87  
   51.88  /** Table of vnets indexed by id. */
   51.89 -static HashTable *vnet_table = NULL;
   51.90 +HashTable *vnet_table = NULL;
   51.91 +
   51.92 +rwlock_t vnet_lock = RW_LOCK_UNLOCKED;
   51.93 +
   51.94 +#define vnet_table_read_lock(flags)    read_lock_irqsave(&vnet_lock, flags)
   51.95 +#define vnet_table_read_unlock(flags)  read_unlock_irqrestore(&vnet_lock, flags)
   51.96 +#define vnet_table_write_lock(flags)   write_lock_irqsave(&vnet_lock, flags)
   51.97 +#define vnet_table_write_unlock(flags) write_unlock_irqrestore(&vnet_lock, flags)
   51.98  
   51.99  /** Decrement reference count, freeing if zero.
  51.100   *
  51.101 @@ -89,7 +112,6 @@ static HashTable *vnet_table = NULL;
  51.102  void Vnet_decref(Vnet *info){
  51.103      if(!info) return;
  51.104      if(atomic_dec_and_test(&info->refcount)){
  51.105 -        vnet_dev_remove(info);
  51.106          deallocate(info);
  51.107      }
  51.108  }
  51.109 @@ -103,26 +125,40 @@ void Vnet_incref(Vnet *info){
  51.110      atomic_inc(&info->refcount);
  51.111  }
  51.112  
  51.113 -void Vnet_print(Vnet *info)
  51.114 +void Vnet_print(Vnet *info, IOStream *io)
  51.115  {
  51.116      char vnetbuf[VNET_ID_BUF];
  51.117 +    char *security;
  51.118  
  51.119 -    printk(KERN_INFO "VNET(vnet=%s device=%s security=%c%c)\n",
  51.120 -           VnetId_ntoa(&info->vnet, vnetbuf),
  51.121 -           info->device,
  51.122 -           ((info->security & SA_AUTH) ? 'a' : '-'),
  51.123 -           ((info->security & SA_CONF) ? 'c' : '-'));
  51.124 +    if(info->security & SA_CONF){
  51.125 +        security = "conf";
  51.126 +    } else if(info->security & SA_AUTH){
  51.127 +        security = "auth";
  51.128 +    } else {
  51.129 +        security = "none";
  51.130 +    }
  51.131 +
  51.132 +    IOStream_print(io, "(vnet");
  51.133 +    IOStream_print(io, " (id %s)", VnetId_ntoa(&info->vnet, vnetbuf));
  51.134 +    IOStream_print(io, " (vnetif %s)", info->device);
  51.135 +    IOStream_print(io, " (security %s)", security);
  51.136 +    IOStream_print(io, " (header %d)", info->header_n);
  51.137 +    IOStream_print(io, ")");
  51.138  }
  51.139  
  51.140 -void vnet_print(void)
  51.141 +void vnet_print(IOStream *io)
  51.142  {
  51.143      HashTable_for_decl(entry);
  51.144      Vnet *info;
  51.145 +    unsigned long flags;
  51.146      
  51.147 +    vnet_table_read_lock(flags);
  51.148      HashTable_for_each(entry, vnet_table){
  51.149          info = entry->value;
  51.150 -        Vnet_print(info);
  51.151 +        Vnet_print(info, io);
  51.152 +        IOStream_print(io, "\n");
  51.153      }
  51.154 +    vnet_table_read_unlock(flags);
  51.155  }
  51.156  
  51.157  /** Allocate a vnet, setting reference count to 1.
  51.158 @@ -141,6 +177,21 @@ int Vnet_alloc(Vnet **info){
  51.159      return err;
  51.160  }
  51.161  
  51.162 +/** Create the virtual interface for a vnet.
  51.163 + *
  51.164 + * @param info vnet
  51.165 + * @return 0 on success, error code otherwise
  51.166 + */
  51.167 +int Vnet_create(Vnet *info){
  51.168 +    int err = 0;
  51.169 +
  51.170 +    err = vnet_dev_add(info);
  51.171 +    if(err) goto exit;
  51.172 +    err = Vnet_add(info);
  51.173 +  exit:
  51.174 +    return err;
  51.175 +}
  51.176 +    
  51.177  /** Add a vnet to the table under its vnet id.
  51.178   *
  51.179   * @param info vnet to add
  51.180 @@ -149,57 +200,139 @@ int Vnet_alloc(Vnet **info){
  51.181  int Vnet_add(Vnet *info){
  51.182      int err = 0;
  51.183      HTEntry *entry = NULL;
  51.184 -    // Vnet_del(info->vnet); //todo: Delete existing vnet info?
  51.185 +    unsigned long flags;
  51.186 +
  51.187 +    if(Vnet_lookup(&info->vnet, NULL) == 0){
  51.188 +        //todo: Delete existing vnet info?
  51.189 +        err = -EEXIST;
  51.190 +        goto exit;
  51.191 +    }
  51.192      Vnet_incref(info);
  51.193 +    vnet_table_write_lock(flags);
  51.194      entry = HashTable_add(vnet_table, &info->vnet, info);
  51.195 +    vnet_table_write_unlock(flags);
  51.196      if(!entry){
  51.197          err = -ENOMEM;
  51.198 +        vnet_dev_remove(info);
  51.199          Vnet_decref(info);
  51.200      }
  51.201 +  exit:
  51.202      return err;
  51.203  }
  51.204  
  51.205  /** Remove a vnet from the table.
  51.206 + * Also removes all vifs and varp entries for the vnet.
  51.207   *
  51.208   * @param vnet id of vnet to remove
  51.209   * @return number of vnets removed
  51.210   */
  51.211  int Vnet_del(VnetId *vnet){
  51.212 -    return HashTable_remove(vnet_table, vnet);
  51.213 +    int count;
  51.214 +    unsigned long flags;
  51.215 +    Vnet *info;
  51.216 +
  51.217 +    vnet_table_write_lock(flags);
  51.218 +    info = HashTable_get(vnet_table, vnet);
  51.219 +    count = HashTable_remove(vnet_table, vnet);
  51.220 +    vnet_table_write_unlock(flags);
  51.221 +    
  51.222 +    varp_remove_vnet(vnet);
  51.223 +    vif_remove_vnet(vnet);
  51.224 +
  51.225 +    if(info){
  51.226 +        // Can't do this in the hashtable entry free function because it runs
  51.227 +        // while we hold the vnet table lock, and the vnet tidy up calls
  51.228 +        // vnet_dev_remove(), which calls unregister_netdev(), which schedules.
  51.229 +        vnet_dev_remove(info);
  51.230 +        Vnet_decref(info);
  51.231 +    }
  51.232 +    return count;
  51.233  }
  51.234  
  51.235  /** Lookup a vnet by id.
  51.236   * References the vnet on success - the caller must decref.
  51.237   *
  51.238   * @param vnet vnet id
  51.239 - * @param info return parameter for vnet
  51.240 + * @param pinfo return parameter for vnet (or NULL)
  51.241   * @return 0 on sucess, -ENOENT if no vnet found
  51.242   */
  51.243 -int Vnet_lookup(VnetId *vnet, Vnet **info){
  51.244 +int Vnet_lookup(VnetId *vnet, Vnet **pinfo){
  51.245      int err = 0;
  51.246 -    *info = HashTable_get(vnet_table, vnet);
  51.247 -    if(*info){
  51.248 -        Vnet_incref(*info);
  51.249 +    unsigned long flags;
  51.250 +    Vnet *info;
  51.251 +
  51.252 +    vnet_table_read_lock(flags);
  51.253 +    info = HashTable_get(vnet_table, vnet);
  51.254 +    if(info){
  51.255 +        if(pinfo){
  51.256 +            Vnet_incref(info);
  51.257 +        }
  51.258      } else {
  51.259          err = -ENOENT;
  51.260      }
  51.261 +    vnet_table_read_unlock(flags);
  51.262 +
  51.263 +    if(pinfo){
  51.264 +        *pinfo = (err ? NULL : info);
  51.265 +    }
  51.266      return err;
  51.267  }
  51.268  
  51.269 +static int vnet_key_equal_fn(void *k1, void *k2){
  51.270 +    return memcmp(k1, k2, sizeof(VnetId)) == 0;
  51.271 +}
  51.272 +
  51.273 +static Hashcode vnet_key_hash_fn(void *k){
  51.274 +    return hash_hvoid(0, k, sizeof(VnetId));
  51.275 +}
  51.276 +
  51.277  /** Free an entry in the vnet table.
  51.278   *
  51.279   * @param table containing table
  51.280   * @param entry to free
  51.281   */
  51.282  static void vnet_entry_free_fn(HashTable *table, HTEntry *entry){
  51.283 -    Vnet *info;
  51.284      if(!entry) return;
  51.285 -    info = entry->value;
  51.286 -    if(info){
  51.287 +    HTEntry_free(entry);
  51.288 +}
  51.289 +
  51.290 +void vnet_table_free(void){
  51.291 +    HashTable *vnt;
  51.292 +    HashTable_for_decl(entry);
  51.293 +
  51.294 +    vnt = vnet_table;
  51.295 +    if(!vnt) return;
  51.296 +    vnet_table = NULL;
  51.297 +    HashTable_for_each(entry, vnt){
  51.298 +        Vnet *info = entry->value;
  51.299          vnet_dev_remove(info);
  51.300          Vnet_decref(info);
  51.301      }
  51.302 -    HTEntry_free(entry);
  51.303 +    HashTable_free(vnt);
  51.304 +}
  51.305 +
  51.306 +int vnet_table_init(void){
  51.307 +    int err = 0;
  51.308 +    vnet_table = HashTable_new(0);
  51.309 +    if(!vnet_table){
  51.310 +        err = -ENOMEM;
  51.311 +        goto exit;
  51.312 +    }
  51.313 +    vnet_table->key_equal_fn = vnet_key_equal_fn;
  51.314 +    vnet_table->key_hash_fn = vnet_key_hash_fn;
  51.315 +    vnet_table->entry_free_fn = vnet_entry_free_fn;
  51.316 +
  51.317 +    err = Vnet_alloc(&vnet_physical);
  51.318 +    if(err) goto exit;
  51.319 +    vnet_physical->vnet = toVnetId(VNET_PHYS);
  51.320 +    vnet_physical->security = 0;
  51.321 +    err = Vnet_add(vnet_physical);
  51.322 +
  51.323 +  exit:
  51.324 +    if(err){
  51.325 +        vnet_table_free();
  51.326 +    }
  51.327 +    return err;
  51.328  }
  51.329  
  51.330  /** Setup some vnet entries (for testing).
  51.331 @@ -223,22 +356,12 @@ static int vnet_setup(void){
  51.332          sprintf(vnet->device, "vnif%04x", vnetid);
  51.333          vnet->security = (vnetid > 10 ? security : 0);
  51.334          err = Vnet_create(vnet);
  51.335 +        Vnet_decref(vnet);
  51.336          if(err) break;
  51.337      }
  51.338      return err;
  51.339  }
  51.340  
  51.341 -int vnet_key_equal_fn(void *k1, void *k2){
  51.342 -    VnetId *key1 = k1;
  51.343 -    VnetId *key2 = k2;
  51.344 -    return VnetId_eq(key1, key2);
  51.345 -}
  51.346 -
  51.347 -Hashcode vnet_key_hash_fn(void *k){
  51.348 -    VnetId *key = k;
  51.349 -    return VnetId_hash(0, key);
  51.350 -}
  51.351 -
  51.352  /** Initialize the vnet table and the physical vnet.
  51.353   *
  51.354   * @return 0 on success, error code otherwise
  51.355 @@ -246,43 +369,38 @@ Hashcode vnet_key_hash_fn(void *k){
  51.356  int vnet_init(void){
  51.357      int err = 0;
  51.358  
  51.359 -    vnet_table = HashTable_new(0);
  51.360 -    if(!vnet_table){
  51.361 -        err = -ENOMEM;
  51.362 -        goto exit;
  51.363 -    }
  51.364 -    vnet_table->key_equal_fn = vnet_key_equal_fn;
  51.365 -    vnet_table->key_hash_fn = vnet_key_hash_fn;
  51.366 -    vnet_table->entry_free_fn = vnet_entry_free_fn;
  51.367 -
  51.368 -    err = Vnet_alloc(&vnet_physical);
  51.369 +    err = vnet_forward_init();
  51.370      if(err) goto exit;
  51.371 -    vnet_physical->vnet = toVnetId(VNET_PHYS);
  51.372 -    vnet_physical->security = 0;
  51.373 -    err = Vnet_add(vnet_physical);
  51.374 +    err = vnet_table_init();
  51.375      if(err) goto exit;
  51.376      err = vnet_setup();
  51.377      if(err) goto exit;
  51.378 +    err = vif_init();
  51.379 +    if(err) goto exit;
  51.380      err = varp_init();
  51.381 -    if(err) goto exit;
  51.382 -    err = vif_init();
  51.383    exit:
  51.384      return err;
  51.385  }
  51.386  
  51.387  void vnet_exit(void){
  51.388 -    vif_exit();
  51.389      varp_exit();
  51.390 -    HashTable_free(vnet_table);
  51.391 -    vnet_table = NULL;
  51.392 +    vif_exit();
  51.393 +    vnet_table_free();
  51.394 +    vnet_forward_exit();
  51.395  }
  51.396  
  51.397 -inline int skb_xmit(struct sk_buff *skb){
  51.398 +#ifdef __KERNEL__
  51.399 +inline int _skb_xmit(struct sk_buff *skb, uint32_t saddr){
  51.400      int err = 0;
  51.401      struct rtable *rt = NULL;
  51.402  
  51.403 -    dprintf(">\n");
  51.404 +    dprintf("> src=%u.%u.%u.%u dst=%u.%u.%u.%u\n",
  51.405 +            NIPQUAD(skb->nh.iph->saddr),
  51.406 +            NIPQUAD(skb->nh.iph->daddr));
  51.407      skb->protocol = htons(ETH_P_IP);
  51.408 +    if(saddr){
  51.409 +        skb->nh.iph->saddr = 0;
  51.410 +    }
  51.411      err = skb_route(skb, &rt);
  51.412      if(err){
  51.413          wprintf("> skb_route=%d\n", err);
  51.414 @@ -295,6 +413,7 @@ inline int skb_xmit(struct sk_buff *skb)
  51.415                  
  51.416          goto exit;
  51.417      }
  51.418 +    dst_release(skb->dst);
  51.419      skb->dst = &rt->u.dst;
  51.420      if(!skb->dev){
  51.421          skb->dev = rt->u.dst.dev;
  51.422 @@ -302,108 +421,104 @@ inline int skb_xmit(struct sk_buff *skb)
  51.423  
  51.424      ip_select_ident(skb->nh.iph, &rt->u.dst, NULL);
  51.425  
  51.426 -    if(skb->nh.iph->saddr == 0){
  51.427 -        skb->nh.iph->saddr = rt->rt_src;
  51.428 +    if(saddr){
  51.429 +        skb->nh.iph->saddr = saddr;
  51.430 +    } else {
  51.431 +        if(!skb->nh.iph->saddr){
  51.432 +            skb->nh.iph->saddr = rt->rt_src;
  51.433 +        }
  51.434      }
  51.435  
  51.436 -    skb->nh.iph->check = 0;
  51.437 -    skb->nh.iph->check = ip_compute_csum(skb->nh.raw, (skb->nh.iph->ihl << 2));
  51.438 +    ip_send_check(skb->nh.iph);
  51.439  
  51.440 -    err = neigh_compat_output(skb);
  51.441 +    if(1){
  51.442 +        // Output to skb destination. Will use ip_output(), which fragments.
  51.443 +        // Slightly slower than neigh_compat_output() (marginal - 1%).
  51.444 +        err = dst_output(skb); 
  51.445 +    } else {
  51.446 +        // Sends direct to device via dev_queue_xmit(). No fragmentation?
  51.447 +        err = neigh_compat_output(skb);
  51.448 +    }
  51.449  
  51.450 +#if 0
  51.451 +    if(needs_frags){
  51.452 +        err = ip_fragment(skb, ip_finish_output);
  51.453 +    } else {
  51.454 +        err = ip_finish_output(skb);
  51.455 +    }
  51.456 +#endif
  51.457    exit:
  51.458      dprintf("< err=%d\n", err);
  51.459      return err;
  51.460  }
  51.461  
  51.462 +#else 
  51.463 +
  51.464 +extern int _skb_xmit(struct sk_buff *skb, uint32_t saddr);
  51.465 +
  51.466 +#endif
  51.467 +
  51.468 +int skb_xmit(struct sk_buff *skb){
  51.469 +    if(MULTICAST(skb->nh.iph->daddr)){
  51.470 +        vnet_forward_send(skb);
  51.471 +    }
  51.472 +    return _skb_xmit(skb, 0);
  51.473 +}
  51.474 +
  51.475  /** Called when a vif sends a packet to the network.
  51.476   * Encapsulates the packet for its vnet and forwards it.
  51.477   *
  51.478   * @param skb packet
  51.479   * @return 0 on success, error code otherwise
  51.480   *
  51.481 - * @todo fixme
  51.482   */
  51.483  int vnet_skb_send(struct sk_buff *skb, VnetId *vnet){
  51.484 -    int err = 0;
  51.485      VnetId vnet_phys = toVnetId(VNET_PHYS);
  51.486 +    int err = 0;
  51.487  
  51.488 -    dprintf(">\n");
  51.489 +    //dprintf(">\n");
  51.490      skb->dev = NULL;
  51.491      if(!vnet || VnetId_eq(vnet, &vnet_phys)){
  51.492          // No vnet or physical vnet, send direct to the network. 
  51.493          skb_xmit(skb);
  51.494      } else {
  51.495 +        // Update the vif table with the source MAC.
  51.496 +        vif_update(vnet, (Vmac*)eth_hdr(skb)->h_source);
  51.497          err = varp_output(skb, vnet);
  51.498      }
  51.499 -    dprintf("< err=%d\n", err);
  51.500 +    //dprintf("< err=%d\n", err);
  51.501      return err;
  51.502  }
  51.503  
  51.504  /** Receive an skb for a vnet.
  51.505   * We make the skb come out of the vif for the vnet, and
  51.506   * let ethernet bridging forward it to related interfaces.
  51.507 - * If the dest is broadcast, goes to all vifs on the vnet.
  51.508 - * If the dest is unicast, goes to the addressed vif on the vnet.
  51.509   *
  51.510   * The packet must have skb->mac.raw set and skb->data must point
  51.511   * after the device (ethernet) header.
  51.512   *
  51.513 + * Return code 1 means we now own the packet - the caller must not free it.
  51.514 + * Return code < 0 means an error - caller still owns the packet.
  51.515 + *
  51.516   * @param skb packet
  51.517   * @param vnet packet vnet
  51.518 - * @param vmac packet vmac
  51.519 - * @return 0 on success, error code otherwise
  51.520   */
  51.521 -int vnet_skb_recv(struct sk_buff *skb, VnetId *vnet, Vmac *vmac){
  51.522 -    int err = 0;
  51.523 -    Vnet *info = NULL;
  51.524 +int vnet_skb_recv(struct sk_buff *skb, Vnet *vnet){
  51.525 +    int err = 1;
  51.526  
  51.527 -    err = Vnet_lookup(vnet, &info);
  51.528 -    if(err) goto exit;
  51.529 -    skb->dev = info->dev;
  51.530 +    if(!vnet->dev){
  51.531 +        // No device for the vnet.
  51.532 +        err = -ENOTCONN;
  51.533 +        goto exit;
  51.534 +    }
  51.535 +    skb->dev = vnet->dev;
  51.536 +    vnet->stats.rx_packets++;
  51.537 +    vnet->stats.rx_bytes += skb->len;
  51.538      netif_rx(skb);
  51.539    exit:
  51.540 -    if(info) Vnet_decref(info);
  51.541 -    if(err){
  51.542 -        kfree_skb(skb);
  51.543 -    }
  51.544      return err;
  51.545  }
  51.546  
  51.547 -/** Determine ESP security mode for a new SA.
  51.548 - *
  51.549 - * @param spi incoming spi
  51.550 - * @param protocol incoming protocol
  51.551 - * @param addr source address
  51.552 - * @return security level or negative error code
  51.553 - *
  51.554 - * @todo Need to check spi, and do some lookup for security params.
  51.555 - */
  51.556 -int vnet_sa_security(u32 spi, int protocol, u32 addr){
  51.557 -    int security = vnet_security_default;
  51.558 -    dprintf("< security=%x\n", security);
  51.559 -    return security;
  51.560 -}
  51.561 -
  51.562 -/** Create a new SA for incoming traffic.
  51.563 - *
  51.564 - * @param spi incoming spi
  51.565 - * @param protocol incoming protocol
  51.566 - * @param addr source address
  51.567 - * @param sa return parameter for SA
  51.568 - * @return 0 on success, error code otherwise
  51.569 - */
  51.570 -int vnet_sa_create(u32 spi, int protocol, u32 addr, SAState **sa){
  51.571 -    int err = 0;
  51.572 -    int security = vnet_sa_security(spi, protocol, addr);
  51.573 -    if(security < 0){
  51.574 -        err = security;
  51.575 -        goto exit;
  51.576 -    }
  51.577 -    err = sa_create(security, spi, protocol, addr, sa);
  51.578 -  exit:
  51.579 -    return err;
  51.580 -}
  51.581  
  51.582  /** Check that a context has the correct properties w.r.t. a vnet.
  51.583   * The context must be secure if the vnet requires security.
  51.584 @@ -443,104 +558,33 @@ int vnet_check_context(VnetId *vnet, Skb
  51.585      return err;
  51.586  }
  51.587  
  51.588 -/** Open function for SA tunnels.
  51.589 - *
  51.590 - * @param tunnel to open
  51.591 - * @return 0 on success, error code otherwise
  51.592 - */
  51.593 -static int sa_tunnel_open(Tunnel *tunnel){
  51.594 -    int err = 0;
  51.595 -    //dprintf(">\n");
  51.596 -    //dprintf("< err=%d\n", err);
  51.597 -    return err;
  51.598 -}
  51.599 -
  51.600 -/** Close function for SA tunnels.
  51.601 - *
  51.602 - * @param tunnel to close (OK if null)
  51.603 - */
  51.604 -static void sa_tunnel_close(Tunnel *tunnel){
  51.605 -    SAState *sa;
  51.606 -    if(!tunnel) return;
  51.607 -    sa = tunnel->data;
  51.608 -    if(!sa) return;
  51.609 -    SAState_decref(sa);
  51.610 -    tunnel->data = NULL;
  51.611 -}
  51.612  
  51.613 -/** Packet send function for SA tunnels.
  51.614 - *
  51.615 - * @param tunnel to send on
  51.616 - * @param skb packet to send
  51.617 - * @return 0 on success, negative error code on error
  51.618 - */
  51.619 -static int sa_tunnel_send(Tunnel *tunnel, struct sk_buff *skb){
  51.620 -    int err = -EINVAL;
  51.621 -    SAState *sa;
  51.622 -    if(!tunnel){
  51.623 -        wprintf("> Null tunnel!\n");
  51.624 -        goto exit;
  51.625 -    }
  51.626 -    sa = tunnel->data;
  51.627 -    if(!sa){
  51.628 -        wprintf("> Null SA!\n");
  51.629 -        goto exit;
  51.630 -    }
  51.631 -    err = SAState_send(sa, skb, tunnel->base);
  51.632 -  exit:
  51.633 -    return err;
  51.634 -}
  51.635 -
  51.636 -/** Functions used by SA tunnels. */
  51.637 -static TunnelType _sa_tunnel_type = {
  51.638 -    .name	= "SA",
  51.639 -    .open	= sa_tunnel_open,
  51.640 -    .close	= sa_tunnel_close,
  51.641 -    .send 	= sa_tunnel_send
  51.642 -};
  51.643 -
  51.644 -/** Functions used by SA tunnels. */
  51.645 -TunnelType *sa_tunnel_type = &_sa_tunnel_type;
  51.646 -
  51.647 -/** Open a tunnel for a vnet to a given address.
  51.648 +/** Create a tunnel for a vnet to a given address.
  51.649   *
  51.650   * @param vnet vnet id
  51.651   * @param addr destination address
  51.652   * @param tunnel return parameter
  51.653   * @return 0 on success, error code otherwise
  51.654   */
  51.655 -int vnet_tunnel_open(VnetId *vnet, VarpAddr *addr, Tunnel **tunnel){
  51.656 -    extern TunnelType *etherip_tunnel_type;
  51.657 +static int vnet_tunnel_create(VnetId *vnet, VarpAddr *addr, Tunnel **tunnel){
  51.658      int err = 0;
  51.659      Vnet *info = NULL;
  51.660 -    Tunnel *base_tunnel = NULL;
  51.661 +    Tunnel *base = NULL;
  51.662      Tunnel *sa_tunnel = NULL;
  51.663 -    Tunnel *etherip_tunnel = NULL;
  51.664 +    Tunnel *eth_tunnel = NULL;
  51.665  
  51.666      err = Vnet_lookup(vnet, &info);
  51.667      if(err) goto exit;
  51.668      if(info->security){
  51.669 -        SAState *sa = NULL;
  51.670 -        //FIXME: Assuming IPv4 for now.
  51.671 -        u32 ipaddr = addr->u.ip4.s_addr;
  51.672 -        err = Tunnel_create(sa_tunnel_type, vnet, addr, base_tunnel, &sa_tunnel);
  51.673 +        err = sa_tunnel_create(info, addr, base, &sa_tunnel);
  51.674          if(err) goto exit;
  51.675 -        err = sa_create(info->security, 0, IPPROTO_ESP, ipaddr, &sa);
  51.676 -        if(err) goto exit;
  51.677 -        sa_tunnel->data = sa;
  51.678 -        base_tunnel = sa_tunnel;
  51.679 +        base = sa_tunnel;
  51.680      }
  51.681 -    err = Tunnel_create(etherip_tunnel_type, vnet, addr, base_tunnel, &etherip_tunnel);
  51.682 -    if(err) goto exit;
  51.683 -    err = Tunnel_add(etherip_tunnel);
  51.684 +    err = etherip_tunnel_create(vnet, addr, base, &eth_tunnel);
  51.685    exit:
  51.686      Tunnel_decref(sa_tunnel);
  51.687      Vnet_decref(info);
  51.688 -    if(err){
  51.689 -        *tunnel = NULL;
  51.690 -    } else {
  51.691 -        *tunnel = etherip_tunnel;
  51.692 -    }
  51.693 +    *tunnel = (err ? NULL : eth_tunnel);
  51.694      return err;
  51.695  }
  51.696  
  51.697 @@ -554,9 +598,9 @@ int vnet_tunnel_open(VnetId *vnet, VarpA
  51.698   */
  51.699  int vnet_tunnel_lookup(VnetId *vnet, VarpAddr *addr, Tunnel **tunnel){
  51.700      int err = 0;
  51.701 -    *tunnel = Tunnel_lookup(vnet, addr);
  51.702 -    if(!*tunnel){
  51.703 -        err = vnet_tunnel_open(vnet, addr, tunnel);
  51.704 +    err = Tunnel_lookup(vnet, addr, tunnel);
  51.705 +    if(err){
  51.706 +        err = Tunnel_open(vnet, addr, vnet_tunnel_create, tunnel);
  51.707      }
  51.708      return err;
  51.709  }
  51.710 @@ -571,14 +615,28 @@ int vnet_tunnel_lookup(VnetId *vnet, Var
  51.711  int vnet_tunnel_send(VnetId *vnet, VarpAddr *addr, struct sk_buff *skb){
  51.712      int err = 0;
  51.713      Tunnel *tunnel = NULL;
  51.714 +
  51.715      err = vnet_tunnel_lookup(vnet, addr, &tunnel);
  51.716 -    if(err) goto exit;
  51.717 +    if(err) {
  51.718 +        char vnetbuf[VNET_ID_BUF];
  51.719 +        char addrbuf[VARP_ADDR_BUF];
  51.720 +        wprintf("No tunnel: skb=%p vnet=%s addr=%s\n",
  51.721 +                skb,
  51.722 +                VnetId_ntoa(vnet, vnetbuf),
  51.723 +                VarpAddr_ntoa(addr, addrbuf));
  51.724 +        goto exit;
  51.725 +    }
  51.726      err = Tunnel_send(tunnel, skb);
  51.727      Tunnel_decref(tunnel);
  51.728    exit:
  51.729      return err;
  51.730  }
  51.731  
  51.732 +#ifdef __KERNEL__
  51.733 +
  51.734 +/** Module parameter for vnet encapsulation. */
  51.735 +static char *vnet_encaps = NULL;
  51.736 +
  51.737  static void __exit vnet_module_exit(void){
  51.738      ProcFS_exit();
  51.739      sa_table_exit();
  51.740 @@ -597,6 +655,9 @@ static void __exit vnet_module_exit(void
  51.741  static int __init vnet_module_init(void){
  51.742      int err = 0;
  51.743  
  51.744 +    if(vnet_encaps && !strcmp(vnet_encaps, "udp")){
  51.745 +        etherip_in_udp = 1;
  51.746 +    }
  51.747      dprintf(">\n");
  51.748      err = random_module_init();
  51.749      if(err) wprintf("> random_module_init err=%d\n", err);
  51.750 @@ -629,3 +690,8 @@ static int __init vnet_module_init(void)
  51.751  module_init(vnet_module_init);
  51.752  module_exit(vnet_module_exit);
  51.753  MODULE_LICENSE("GPL");
  51.754 +
  51.755 +MODULE_PARM(vnet_encaps, "s");
  51.756 +MODULE_PARM_DESC(vnet_encaps, "Vnet encapsulation: etherip or udp.");
  51.757 +
  51.758 +#endif
    52.1 --- a/tools/vnet/vnet-module/vnet.h	Thu Feb 09 16:09:00 2006 +0100
    52.2 +++ b/tools/vnet/vnet-module/vnet.h	Thu Feb 09 16:12:11 2006 +0100
    52.3 @@ -1,5 +1,5 @@
    52.4  /*
    52.5 - * Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
    52.6 + * Copyright (C) 2004, 2005 Mike Wray <mike.wray@hp.com>
    52.7   *
    52.8   * This program is free software; you can redistribute it and/or modify
    52.9   * it under the terms of the GNU General Public License as published by the 
   52.10 @@ -19,61 +19,83 @@
   52.11  #ifndef __VNET_VNET_H__
   52.12  #define __VNET_VNET_H__
   52.13  
   52.14 +#ifdef __KERNEL__
   52.15 +
   52.16  #include <asm/atomic.h>
   52.17  #include <linux/skbuff.h>
   52.18 +#include <linux/if.h>
   52.19 +#include <linux/netdevice.h>
   52.20  
   52.21 -#include <tunnel.h>
   52.22 -#include <skb_context.h>
   52.23 +#else
   52.24  
   52.25 +#include <linux/netdevice.h> // struct net_device_stats
   52.26 +
   52.27 +struct net_device {
   52.28 +    char name[IFNAMSIZ];
   52.29 +    char tap[255];
   52.30 +    int  tapfd;
   52.31 +};
   52.32 +
   52.33 +#endif
   52.34 +
   52.35 +#include <if_varp.h>
   52.36 +
   52.37 +struct sk_buff;
   52.38 +
   52.39 +struct IOStream;
   52.40  struct Vmac;
   52.41  struct Vif;
   52.42 -struct net_device;
   52.43 +struct SkbContext;
   52.44 +struct VarpAddr;
   52.45 +struct Tunnel;
   52.46 +struct SAState;
   52.47  
   52.48  /** Vnet property record. */
   52.49  typedef struct Vnet {
   52.50 +    /** Vnet id. */
   52.51 +    struct VnetId vnet;
   52.52      /** Reference count. */
   52.53      atomic_t refcount;
   52.54 -    /** Vnet id. */
   52.55 -    struct VnetId vnet;
   52.56      /** Security flag. If true the vnet requires ESP. */
   52.57      int security;
   52.58      char device[IFNAMSIZ];
   52.59  
   52.60      struct net_device *dev;
   52.61 -    struct net_device *bridge;
   52.62      
   52.63      /** Max size of the header. */
   52.64      int header_n;
   52.65 +    int mtu;
   52.66      /** Statistics. */
   52.67      struct net_device_stats stats;
   52.68      int recursion;
   52.69  } Vnet;
   52.70  
   52.71 -extern void vnet_print(void);
   52.72 -extern void Vnet_print(Vnet *info);
   52.73 +extern void vnet_print(struct IOStream *io);
   52.74 +extern void Vnet_print(struct Vnet *info, struct IOStream *io);
   52.75  
   52.76  extern int Vnet_lookup(struct VnetId *vnet, struct Vnet **info);
   52.77 +extern int Vnet_create(struct Vnet *info);
   52.78  extern int Vnet_add(struct Vnet *info);
   52.79  extern int Vnet_del(struct VnetId *vnet);
   52.80  extern void Vnet_incref(struct Vnet *info);
   52.81  extern void Vnet_decref(struct Vnet *info);
   52.82  extern int Vnet_alloc(struct Vnet **info);
   52.83 -extern Vnet *vnet_physical;
   52.84 +extern struct Vnet *vnet_physical;
   52.85  
   52.86  extern int skb_xmit(struct sk_buff *skb);
   52.87 +extern int skb_xmit_fwd(struct sk_buff *skb);
   52.88  extern int vnet_skb_send(struct sk_buff *skb, struct VnetId *vnet);
   52.89 -extern int vnet_skb_recv(struct sk_buff *skb, struct VnetId *vnet, struct Vmac *vmac);
   52.90 +extern int vnet_skb_recv(struct sk_buff *skb, struct Vnet *vnet);
   52.91  
   52.92 -extern int vnet_check_context(struct VnetId *vnet, SkbContext *context, Vnet **vinfo);
   52.93 +extern int vnet_check_context(struct VnetId *vnet, struct SkbContext *context, struct Vnet **vinfo);
   52.94  
   52.95 -extern int vnet_tunnel_open(struct VnetId *vnet, struct VarpAddr *addr, Tunnel **tunnel);
   52.96 -extern int vnet_tunnel_lookup(struct VnetId *vnet, struct VarpAddr *addr, Tunnel **tunnel);
   52.97 +extern int vnet_tunnel_open(struct VnetId *vnet, struct VarpAddr *addr, struct Tunnel **tunnel);
   52.98 +extern int vnet_tunnel_lookup(struct VnetId *vnet, struct VarpAddr *addr, struct Tunnel **tunnel);
   52.99  extern int vnet_tunnel_send(struct VnetId *vnet, struct VarpAddr *addr, struct sk_buff *skb);
  52.100  
  52.101  extern int vnet_init(void);
  52.102  
  52.103  extern int vnet_sa_security(u32 spi, int protocol, u32 addr);
  52.104 -struct SAState;
  52.105  extern int vnet_sa_create(u32 spi, int protocol, u32 addr, struct SAState **sa);
  52.106  
  52.107  enum {
  52.108 @@ -81,4 +103,6 @@ enum {
  52.109      VNET_VIF = 2,
  52.110  };
  52.111  
  52.112 +extern struct HashTable *vnet_table;
  52.113 +
  52.114  #endif /* !__VNET_VNET_H__ */
    53.1 --- a/tools/vnet/vnet-module/vnet_dev.c	Thu Feb 09 16:09:00 2006 +0100
    53.2 +++ b/tools/vnet/vnet-module/vnet_dev.c	Thu Feb 09 16:12:11 2006 +0100
    53.3 @@ -1,5 +1,5 @@
    53.4  /*
    53.5 - * Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
    53.6 + * Copyright (C) 2004, 2005 Mike Wray <mike.wray@hp.com>
    53.7   *
    53.8   * This program is free software; you can redistribute it and/or modify
    53.9   * it under the terms of the GNU General Public License as published by the 
   53.10 @@ -42,6 +42,7 @@
   53.11  #include <varp.h>
   53.12  #include <vif.h>
   53.13  #include <vnet_dev.h>
   53.14 +#include <random.h>
   53.15  
   53.16  #define MODULE_NAME "VNET"
   53.17  #define DEBUG 1
   53.18 @@ -49,51 +50,31 @@
   53.19  #include "debug.h"
   53.20  
   53.21  #ifndef CONFIG_BRIDGE
   53.22 -#error Must configure ethernet bridging in Network Options
   53.23 +#warning Should configure ethernet bridging in kernel Network Options
   53.24  #endif
   53.25  
   53.26  static void vnet_dev_destructor(struct net_device *dev){
   53.27 -    dprintf(">\n");
   53.28 -    dev->open                 = NULL;
   53.29 -    dev->stop                 = NULL;
   53.30 -    dev->uninit               = NULL;
   53.31 -    dev->destructor           = NULL;
   53.32 -    dev->hard_start_xmit      = NULL;
   53.33 -    dev->get_stats            = NULL;
   53.34 -    dev->do_ioctl             = NULL;
   53.35 -    dev->change_mtu           = NULL;
   53.36 -
   53.37 -    dev->tx_timeout           = NULL;
   53.38 -    dev->set_multicast_list   = NULL;
   53.39 -    dev->flags                = 0;
   53.40 -
   53.41 -    dev->priv                 = NULL;
   53.42 -}
   53.43 -
   53.44 -static void vnet_dev_uninit(struct net_device *dev){
   53.45 -    //Vnet *vnet = dev->priv;
   53.46 -    dprintf(">\n");
   53.47 -    //dev_put(dev);
   53.48 -    dprintf("<\n");
   53.49 +    Vnet *vnet = dev->priv;
   53.50 +    if(vnet){
   53.51 +        if(vnet->dev == dev){
   53.52 +            vnet->dev = NULL;
   53.53 +        }
   53.54 +        dev->priv = NULL;
   53.55 +        Vnet_decref(vnet);
   53.56 +    }
   53.57 +    free_netdev(dev);
   53.58  }
   53.59  
   53.60  static struct net_device_stats *vnet_dev_get_stats(struct net_device *dev){
   53.61 +    static struct net_device_stats stats = {};
   53.62      Vnet *vnet = dev->priv;
   53.63 -    //dprintf(">\n");
   53.64 -    return &vnet->stats;
   53.65 -}
   53.66 -
   53.67 -static int vnet_dev_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd){
   53.68 -    int err = 0;
   53.69 -    
   53.70 -    dprintf(">\n");
   53.71 -    return err;
   53.72 +    return (vnet ? &vnet->stats : &stats);
   53.73  }
   53.74  
   53.75  static int vnet_dev_change_mtu(struct net_device *dev, int mtu){
   53.76      int err = 0;
   53.77      Vnet *vnet = dev->priv;
   53.78 -    if (mtu < 68 || mtu > 1500 - vnet->header_n){
   53.79 +    if (mtu < 68 || mtu > (vnet ? vnet->mtu : 1500)){
   53.80          err = -EINVAL;
   53.81          goto exit;
   53.82      }
   53.83 @@ -102,64 +83,29 @@ static int vnet_dev_change_mtu(struct ne
   53.84      return err;
   53.85  }
   53.86  
   53.87 -static int vnet_dev_set_name(struct net_device *dev){
   53.88 -    int err = 0;
   53.89 -    Vnet *vnet = (void*)dev->priv;
   53.90 -
   53.91 -    dprintf(">\n");
   53.92 -    if(__dev_get_by_name(vnet->device)){
   53.93 -        err = -ENOMEM;
   53.94 -        wprintf("> vnet device name in use: %s\n", vnet->device);
   53.95 -    }
   53.96 -    strcpy(dev->name, vnet->device);
   53.97 -    dprintf("< err=%d\n", err);
   53.98 -    return err;
   53.99 -}
  53.100 -
  53.101 -/** Create the virtual interface for a vnet.
  53.102 - *
  53.103 - * @param info vnet
  53.104 - * @return 0 on success, error code otherwise
  53.105 - */
  53.106 -int Vnet_create(Vnet *info){
  53.107 -    int err = 0;
  53.108 -
  53.109 -    err = vnet_dev_add(info);
  53.110 -    if(err) goto exit;
  53.111 -    err = Vnet_add(info);
  53.112 -  exit:
  53.113 -    return err;
  53.114 -}
  53.115 -    
  53.116  /** Remove the net device for a vnet.
  53.117 - * Clears the dev field of the vnet.
  53.118   * Safe to call if the vnet or its dev are null.
  53.119   *
  53.120   * @param vnet vnet
  53.121   */
  53.122  void vnet_dev_remove(Vnet *vnet){
  53.123 -    if(!vnet) return;
  53.124 -    if(vnet->dev){
  53.125 -        //dev_put(vnet->dev);
  53.126 -        dprintf("> unregister_netdev(%s)\n", vnet->dev->name);
  53.127 +    if(vnet && vnet->dev){
  53.128 +        iprintf("> Removing vnet device %s\n", vnet->dev->name);
  53.129          unregister_netdev(vnet->dev);
  53.130 -        vnet->dev = NULL;
  53.131      }
  53.132  }
  53.133  
  53.134  static int vnet_dev_open(struct net_device *dev){
  53.135      int err = 0;
  53.136 -    dprintf(">\n");
  53.137 +
  53.138      netif_start_queue(dev);
  53.139 -    dprintf("<\n");
  53.140      return err;
  53.141  }
  53.142  
  53.143  static int vnet_dev_stop(struct net_device *dev){
  53.144      int err = 0;
  53.145 -    dprintf(">\n");
  53.146 +
  53.147      netif_stop_queue(dev);
  53.148 -    dprintf("<\n");
  53.149      return err;
  53.150  }
  53.151  
  53.152 @@ -168,25 +114,28 @@ static int vnet_dev_hard_start_xmit(stru
  53.153      Vnet *vnet = dev->priv;
  53.154      int len = 0;
  53.155  
  53.156 -    dprintf("> skb=%p\n", skb);
  53.157 +    if(!skb){
  53.158 +        wprintf("> skb NULL!\n");
  53.159 +        return -EINVAL;
  53.160 +    }
  53.161 +    if(!vnet){
  53.162 +        return -ENOTCONN;
  53.163 +    }
  53.164      if(vnet->recursion++) {
  53.165 +        extern void print_skb(const char *msg, int count, struct sk_buff *skb);
  53.166 +        char vnetbuf[VNET_ID_BUF];
  53.167 +        
  53.168          vnet->stats.collisions++;
  53.169  	vnet->stats.tx_errors++;
  53.170 -        wprintf("> recursion!\n");
  53.171 -	dev_kfree_skb(skb);
  53.172 +        wprintf("> recursion! vnet=%s\n", VnetId_ntoa(&vnet->vnet, vnetbuf));
  53.173 +        print_skb("RECURSION", 0, skb);
  53.174 +        varp_print(iostdout);
  53.175 +	kfree_skb(skb);
  53.176          goto exit;
  53.177      }
  53.178 -    if(!skb){
  53.179 -        err = -EINVAL;
  53.180 -        wprintf("> skb NULL!\n");
  53.181 -        goto exit;
  53.182 -    }
  53.183 -    dprintf("> skb->data=%p skb->mac.raw=%p\n", skb->data, skb->mac.raw);
  53.184 -    if(skb->mac.raw < skb->data || skb->mac.raw > skb->nh.raw){
  53.185 -        wprintf("> skb mac duff!\n");
  53.186 +    if(!skb->mac.raw){
  53.187          skb->mac.raw = skb->data;
  53.188 -    }
  53.189 -    //dev->trans_start = jiffies;
  53.190 +    }        
  53.191      len = skb->len;
  53.192      // Must not use skb pointer after vnet_skb_send().
  53.193      err = vnet_skb_send(skb, &vnet->vnet);
  53.194 @@ -198,20 +147,24 @@ static int vnet_dev_hard_start_xmit(stru
  53.195      }
  53.196    exit:
  53.197      vnet->recursion--;
  53.198 -    dprintf("<\n");
  53.199      return 0;
  53.200  }
  53.201  
  53.202 +void vnet_dev_set_multicast_list(struct net_device *dev){
  53.203 +}
  53.204 +
  53.205 +#if 0
  53.206 +static int vnet_dev_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd){
  53.207 +    int err = 0;
  53.208 +    
  53.209 +    return err;
  53.210 +}
  53.211 +
  53.212  void vnet_dev_tx_timeout(struct net_device *dev){
  53.213 -    dprintf(">\n");
  53.214      //dev->trans_start = jiffies;
  53.215      //netif_wake_queue(dev);
  53.216  }
  53.217  
  53.218 -void vnet_dev_set_multicast_list(struct net_device *dev){
  53.219 -    dprintf(">\n");
  53.220 -}
  53.221 -
  53.222  static int (*eth_hard_header)(struct sk_buff *skb,
  53.223                                struct net_device *dev, unsigned short type,
  53.224                                void *daddr, void *saddr, unsigned len) = NULL;
  53.225 @@ -227,18 +180,7 @@ static int vnet_dev_hard_header(struct s
  53.226    exit:
  53.227      return err;
  53.228  }
  53.229 -
  53.230 -void vnet_default_mac(unsigned char *mac)
  53.231 -{
  53.232 -    static unsigned val = 1;
  53.233 -    mac[0] = 0xAA;
  53.234 -    mac[1] = 0xFF;
  53.235 -    mac[2] = (unsigned char)((val >> 24) & 0xff);
  53.236 -    mac[3] = (unsigned char)((val >> 16) & 0xff);
  53.237 -    mac[4] = (unsigned char)((val >>  8) & 0xff);
  53.238 -    mac[5] = (unsigned char)((val      ) & 0xff);
  53.239 -    val++;
  53.240 -}
  53.241 +#endif
  53.242  
  53.243  int vnet_device_mac(const char *device, unsigned char *mac){
  53.244      int err;
  53.245 @@ -253,97 +195,98 @@ int vnet_device_mac(const char *device, 
  53.246  }
  53.247  
  53.248  void vnet_dev_mac(unsigned char *mac){
  53.249 -    const char *devices[] = { "eth0", "eth1", "eth2", NULL };
  53.250 -    const char **pdev;
  53.251 -    int err = -ENODEV;
  53.252 -
  53.253 -    for(pdev = devices; err && *pdev; pdev++){
  53.254 -        err = vnet_device_mac(*pdev, mac);
  53.255 -    }
  53.256 -    if(err){
  53.257 -        vnet_default_mac(mac);
  53.258 -    }
  53.259 +    mac[0] = 0xAA;
  53.260 +    mac[1] = 0xFF;
  53.261 +    get_random_bytes(mac + 2, 4);
  53.262  }
  53.263  
  53.264 -static int vnet_dev_init(struct net_device *dev){
  53.265 -    int err = 0;
  53.266 -    Vnet *vnet = (void*)dev->priv;
  53.267 - 
  53.268 -    dprintf(">\n");
  53.269 +/** Initial setup of the device for a vnet.
  53.270 + */
  53.271 +static void vnet_dev_init(struct net_device *dev){
  53.272      ether_setup(dev);
  53.273  
  53.274 +#if 0
  53.275      if(!eth_hard_header){
  53.276          eth_hard_header = dev->hard_header;
  53.277      }
  53.278      dev->hard_header          = vnet_dev_hard_header;
  53.279 +    //dev->do_ioctl             = vnet_dev_do_ioctl;
  53.280 +    //dev->tx_timeout           = vnet_dev_tx_timeout;
  53.281 +    //dev->watchdog_timeo       = TX_TIMEOUT;
  53.282 +    
  53.283 +#endif
  53.284  
  53.285      dev->open                 = vnet_dev_open;
  53.286      dev->stop                 = vnet_dev_stop;
  53.287 -    dev->uninit               = vnet_dev_uninit;
  53.288      dev->destructor           = vnet_dev_destructor;
  53.289      dev->hard_start_xmit      = vnet_dev_hard_start_xmit;
  53.290      dev->get_stats            = vnet_dev_get_stats;
  53.291 -    dev->do_ioctl             = vnet_dev_do_ioctl;
  53.292      dev->change_mtu           = vnet_dev_change_mtu;
  53.293 -
  53.294 -    dev->tx_timeout           = vnet_dev_tx_timeout;
  53.295 -    dev->watchdog_timeo       = TX_TIMEOUT;
  53.296      dev->set_multicast_list   = vnet_dev_set_multicast_list;
  53.297 -    
  53.298 -    dev->hard_header_len      += vnet->header_n;
  53.299 -    dev->mtu                  -= vnet->header_n;
  53.300 -
  53.301 -    vnet_dev_mac(dev->dev_addr);
  53.302  
  53.303      dev->flags |= IFF_DEBUG;
  53.304      dev->flags |= IFF_PROMISC;
  53.305      dev->flags |= IFF_ALLMULTI;
  53.306  
  53.307 -    dprintf("<\n");
  53.308 +    vnet_dev_mac(dev->dev_addr);
  53.309 +}
  53.310 +
  53.311 +/** Complete the setup of the device for a vnet.
  53.312 + * Associate the device and the vnet and set mtu etc.
  53.313 + */
  53.314 +static int vnet_dev_setup(Vnet *vnet, struct net_device *dev){
  53.315 +    int err;
  53.316 +
  53.317 +    Vnet_incref(vnet);
  53.318 +    dev->priv = vnet;
  53.319 +    vnet->dev = dev;
  53.320 +    dev->hard_header_len += vnet->header_n;
  53.321 +    if(!etherip_in_udp){
  53.322 +        dev->mtu -= vnet->header_n;
  53.323 +    }
  53.324 +    vnet->mtu = dev->mtu;
  53.325 +    iprintf("> Adding vnet device %s\n", dev->name);
  53.326 +    err = register_netdev(dev);
  53.327 +    if(err){
  53.328 +        eprintf("> register_netdev(%s) = %d\n", dev->name, err);
  53.329 +        vnet_dev_destructor(dev);
  53.330 +    }
  53.331      return err;
  53.332  }
  53.333  
  53.334 +static inline int roundup(int n, int k){
  53.335 +    return k * ((n + k - 1) / k);
  53.336 +}
  53.337 +
  53.338  /** Add the interface (net device) for a vnet.
  53.339   * Sets the dev field of the vnet on success.
  53.340 - * Does nothing if the vif already has an interface.
  53.341 + * Does nothing if the vnet already has an interface.
  53.342   *
  53.343 - * @param vif vif
  53.344 + * @param vnet vnet
  53.345   * @return 0 on success, error code otherwise
  53.346   */
  53.347  int vnet_dev_add(Vnet *vnet){
  53.348      int err = 0;
  53.349      struct net_device *dev = NULL;
  53.350  
  53.351 -    dprintf("> vnet=%p\n", vnet);
  53.352      if(vnet->dev) goto exit;
  53.353 -    vnet->header_n = sizeof(struct iphdr) + sizeof(struct etheriphdr);
  53.354 -    dev = kmalloc(sizeof(struct net_device), GFP_ATOMIC);
  53.355 +    vnet->header_n = ETH_HLEN + sizeof(struct iphdr) + sizeof(struct etheriphdr);
  53.356 +    if(etherip_in_udp){
  53.357 +        vnet->header_n += sizeof(struct VnetMsgHdr);
  53.358 +        vnet->header_n += sizeof(struct udphdr);
  53.359 +    }
  53.360 +    vnet->header_n = roundup(vnet->header_n, 4);
  53.361 +    dev = alloc_netdev(0, vnet->device, vnet_dev_init);
  53.362      if(!dev){
  53.363          err = -ENOMEM;
  53.364          goto exit;
  53.365      }
  53.366 -    *dev = (struct net_device){};
  53.367 -    dev->priv = vnet;
  53.368 -    vnet->dev = dev;
  53.369 -
  53.370 -    err = vnet_dev_set_name(dev);
  53.371 -    if(err) goto exit;
  53.372 -    vnet_dev_init(dev);
  53.373 -    err = register_netdev(dev);
  53.374 -    if(err){
  53.375 -        wprintf("> register_netdev(%s) = %d\n", dev->name, err);
  53.376 -    }
  53.377 +    err = vnet_dev_setup(vnet, dev);
  53.378      if(err) goto exit;
  53.379      rtnl_lock();
  53.380      dev_open(dev);
  53.381      rtnl_unlock();
  53.382  
  53.383 -    //dev_hold(dev);
  53.384    exit:
  53.385 -    if(err){
  53.386 -        if(dev) kfree(dev);
  53.387 -        vnet->dev = NULL;
  53.388 -    }
  53.389 -    dprintf("< err=%d\n", err);
  53.390      return err;
  53.391  }
    54.1 --- a/tools/vnet/vnet-module/vnet_dev.h	Thu Feb 09 16:09:00 2006 +0100
    54.2 +++ b/tools/vnet/vnet-module/vnet_dev.h	Thu Feb 09 16:12:11 2006 +0100
    54.3 @@ -1,5 +1,5 @@
    54.4  /*
    54.5 - * Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
    54.6 + * Copyright (C) 2004, 2005 Mike Wray <mike.wray@hp.com>
    54.7   *
    54.8   * This program is free software; you can redistribute it and/or modify
    54.9   * it under the terms of the GNU General Public License as published by the 
   54.10 @@ -23,6 +23,5 @@ struct Vnet;
   54.11  
   54.12  extern int vnet_dev_add(struct Vnet *vnet);
   54.13  extern void vnet_dev_remove(struct Vnet *vnet);
   54.14 -extern int Vnet_create(struct Vnet *info);
   54.15  
   54.16  #endif
    55.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    55.2 +++ b/tools/vnet/vnet-module/vnet_eval.c	Thu Feb 09 16:12:11 2006 +0100
    55.3 @@ -0,0 +1,378 @@
    55.4 +/*
    55.5 + * Copyright (C) 2004, 2005 Mike Wray <mike.wray@hp.com>
    55.6 + *
    55.7 + * This program is free software; you can redistribute it and/or modify
    55.8 + * it under the terms of the GNU General Public License as published by the 
    55.9 + * Free Software Foundation; either version 2 of the License, or (at your
   55.10 + * option) any later version.
   55.11 + * 
   55.12 + * This program is distributed in the hope that it will be useful, but
   55.13 + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
   55.14 + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
   55.15 + * for more details.
   55.16 + *
   55.17 + * You should have received a copy of the GNU General Public License along
   55.18 + * with this program; if not, write to the Free software Foundation, Inc.,
   55.19 + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA
   55.20 + *
   55.21 + */
   55.22 +
   55.23 +#ifdef __KERNEL__
   55.24 +
   55.25 +#include <linux/config.h>
   55.26 +#include <linux/module.h>
   55.27 +#include <linux/types.h>
   55.28 +#include <linux/kernel.h>
   55.29 +#include <linux/version.h>
   55.30 +#include <linux/errno.h>
   55.31 +
   55.32 +#else 
   55.33 +
   55.34 +#include "sys_kernel.h"
   55.35 +#include "spinlock.h"
   55.36 +
   55.37 +#include <sys/types.h>
   55.38 +#include <sys/socket.h>
   55.39 +#include <netinet/in.h>
   55.40 +#include <arpa/inet.h>
   55.41 +
   55.42 +#endif
   55.43 +
   55.44 +#include "vnet.h"
   55.45 +#include "varp.h"
   55.46 +#include "vif.h"
   55.47 +#include "vnet_forward.h"
   55.48 +#include "sa.h"
   55.49 +
   55.50 +#include "iostream.h"
   55.51 +
   55.52 +#ifdef __KERNEL__
   55.53 +#include "kernel_stream.h"
   55.54 +#else
   55.55 +#include "file_stream.h"
   55.56 +#endif
   55.57 +
   55.58 +#include "sxpr_util.h"
   55.59 +#include "vnet_eval.h"
   55.60 +
   55.61 +#define MODULE_NAME "VNET"
   55.62 +#define DEBUG 1
   55.63 +#undef DEBUG
   55.64 +#include "debug.h"
   55.65 +
   55.66 +/** Create a vnet.
   55.67 + * It is an error if a vnet with the same id exists.
   55.68 + *
   55.69 + * @param vnet vnet id
   55.70 + * @param device vnet device name
   55.71 + * @param security security level
   55.72 + * @return 0 on success, error code otherwise
   55.73 + */
   55.74 +static int ctrl_vnet_add(VnetId *vnet, char *device, int security){
   55.75 +    int err = 0;
   55.76 +    Vnet *vnetinfo = NULL;
   55.77 +
   55.78 +    if(strlen(device) >= IFNAMSIZ){
   55.79 +        err = -EINVAL;
   55.80 +        goto exit;
   55.81 +    }
   55.82 +    if(Vnet_lookup(vnet, NULL) == 0){
   55.83 +        err = -EEXIST;
   55.84 +        goto exit;
   55.85 +    }
   55.86 +    err = Vnet_alloc(&vnetinfo);
   55.87 +    if(err) goto exit;
   55.88 +    vnetinfo->vnet = *vnet;
   55.89 +    vnetinfo->security = security;
   55.90 +    strcpy(vnetinfo->device, device);
   55.91 +    err = Vnet_create(vnetinfo);
   55.92 +  exit:
   55.93 +    if(vnetinfo) Vnet_decref(vnetinfo);
   55.94 +    return err;
   55.95 +}
   55.96 +
   55.97 +/** Create an entry for a vif with the given vnet and vmac.
   55.98 + *
   55.99 + * @param vnet vnet id
  55.100 + * @param vmac mac address
  55.101 + * @return 0 on success, error code otherwise
  55.102 + */
  55.103 +static int ctrl_vif_add(VnetId *vnet, Vmac *vmac){
  55.104 +    int err = 0;
  55.105 +    Vif *vif = NULL;
  55.106 +
  55.107 +    err = Vnet_lookup(vnet, NULL);
  55.108 +    if(err) goto exit;
  55.109 +    err = vif_create(vnet, vmac, 0, &vif);
  55.110 +  exit:
  55.111 +    if(vif) vif_decref(vif);
  55.112 +    return err;
  55.113 +}
  55.114 +
  55.115 +/** Delete a vif.
  55.116 + *
  55.117 + * @param vnet vnet id
  55.118 + * @param vmac mac address
  55.119 + * @return 0 on success, error code otherwise
  55.120 + */
  55.121 +static int ctrl_vif_del(VnetId *vnet, Vmac *vmac){
  55.122 +    int err = 0;
  55.123 +    Vif *vif = NULL;
  55.124 +
  55.125 +    err = Vnet_lookup(vnet, NULL);
  55.126 +    if(err) goto exit;
  55.127 +    err = vif_lookup(vnet, vmac, &vif);
  55.128 +    if(err) goto exit;
  55.129 +    vif_remove(vnet, vmac);
  55.130 +  exit:
  55.131 +    if(vif) vif_decref(vif);
  55.132 +    return err;
  55.133 +}
  55.134 +
  55.135 +/** (varp.print)
  55.136 + */
  55.137 +static int eval_varp_print(Sxpr exp, IOStream *out, void *data){
  55.138 +    int err = 0;
  55.139 +    vnet_print(out);
  55.140 +    vif_print(out);
  55.141 +    varp_print(out);
  55.142 +    return err;
  55.143 +}
  55.144 +
  55.145 +static int eval_varp_list(Sxpr exp, IOStream *out, void *data){
  55.146 +    int err = 0;
  55.147 +    varp_print(out);
  55.148 +    return err;
  55.149 +}
  55.150 +
  55.151 +/** (varp.mcaddr (addr <addr>))
  55.152 + */
  55.153 +static int eval_varp_mcaddr(Sxpr exp, IOStream *out, void *data){
  55.154 +    int err =0;
  55.155 +    Sxpr oaddr = intern("addr");
  55.156 +    uint32_t addr;
  55.157 +
  55.158 +    err = child_addr(exp, oaddr, &addr);
  55.159 +    if(err < 0) goto exit;
  55.160 +    varp_set_mcast_addr(addr);
  55.161 +  exit:
  55.162 +    return err;
  55.163 +}
  55.164 +
  55.165 +/** (varp.flush)
  55.166 + */
  55.167 +static int eval_varp_flush(Sxpr exp, IOStream *out, void *data){
  55.168 +    int err = 0;
  55.169 +    varp_flush();
  55.170 +    return err;
  55.171 +}
  55.172 +
  55.173 +/** (vnet.add (id <id>)
  55.174 + *            [(vnetif <name>)]
  55.175 + *            [(security { none | auth | conf } )]
  55.176 + *  )
  55.177 + */
  55.178 +int eval_vnet_add(Sxpr exp, IOStream *out, void *data){
  55.179 +    int err = 0;
  55.180 +    Sxpr oid = intern("id");
  55.181 +    Sxpr osecurity = intern("security");
  55.182 +    Sxpr ovnetif = intern("vnetif");
  55.183 +    Sxpr csecurity;
  55.184 +    VnetId vnet = {};
  55.185 +    char *device = NULL;
  55.186 +    char dev[IFNAMSIZ] = {};
  55.187 +    char *security = NULL;
  55.188 +    int sec;
  55.189 +
  55.190 +    err = child_vnet(exp, oid, &vnet);
  55.191 +    if(err) goto exit;
  55.192 +    child_string(exp, ovnetif, &device);
  55.193 +    if(!device){
  55.194 +        snprintf(dev, IFNAMSIZ-1, "vnif%04x", ntohs(vnet.u.vnet16[7]));
  55.195 +        device = dev;
  55.196 +    }
  55.197 +    csecurity = sxpr_child_value(exp, osecurity, intern("none"));
  55.198 +    err = stringof(csecurity, &security);
  55.199 +    if(err) goto exit;
  55.200 +    if(strcmp(security, "none")==0){
  55.201 +        sec = 0;
  55.202 +    } else if(strcmp(security, "auth")==0){
  55.203 +        sec = SA_AUTH;
  55.204 +    } else if(strcmp(security, "conf")==0){
  55.205 +        sec = SA_CONF;
  55.206 +    } else {
  55.207 +        err = -EINVAL;
  55.208 +        goto exit;
  55.209 +    }
  55.210 +    err = ctrl_vnet_add(&vnet, device, sec);
  55.211 + exit:
  55.212 +    return err;
  55.213 +}
  55.214 +
  55.215 +/** Delete a vnet.
  55.216 + *
  55.217 + * (vnet.del (id <id>))
  55.218 + *
  55.219 + * @param vnet vnet id
  55.220 + * @return 0 on success, error code otherwise
  55.221 + */
  55.222 +static int eval_vnet_del(Sxpr exp, IOStream *out, void *data){
  55.223 +    int err = 0;
  55.224 +    Sxpr oid = intern("id");
  55.225 +    VnetId vnet = {};
  55.226 +
  55.227 +    err = child_vnet(exp, oid, &vnet);
  55.228 +    if(err) goto exit;
  55.229 +    err = Vnet_del(&vnet);
  55.230 +  exit:
  55.231 +    return err;
  55.232 +}
  55.233 +
  55.234 +static int eval_vnet_list(Sxpr exp, IOStream *out, void *data){
  55.235 +    int err = 0;
  55.236 +    vnet_print(out);
  55.237 +    return err;
  55.238 +}
  55.239 +
  55.240 +/** (vif.add (vnet <vnet>) (vmac <macaddr>))
  55.241 + */
  55.242 +static int eval_vif_add(Sxpr exp, IOStream *out, void *data){
  55.243 +    int err = 0;
  55.244 +    Sxpr ovnet = intern("vnet");
  55.245 +    Sxpr ovmac = intern("vmac");
  55.246 +    VnetId vnet = {};
  55.247 +    Vmac vmac = {};
  55.248 +
  55.249 +    err = child_vnet(exp, ovnet, &vnet);
  55.250 +    if(err) goto exit;
  55.251 +    err = child_mac(exp, ovmac, vmac.mac);
  55.252 +    if(err) goto exit;
  55.253 +    err = ctrl_vif_add(&vnet, &vmac);
  55.254 +  exit:
  55.255 +    return err;
  55.256 +}
  55.257 +
  55.258 +/** (vif.del (vnet <vnet>) (vmac <macaddr>))
  55.259 + */
  55.260 +static int eval_vif_del(Sxpr exp, IOStream *out, void *data){
  55.261 +    int err = 0;
  55.262 +    Sxpr ovnet = intern("vnet");
  55.263 +    Sxpr ovmac = intern("vmac");
  55.264 +    VnetId vnet = {};
  55.265 +    Vmac vmac = {};
  55.266 +
  55.267 +    err = child_vnet(exp, ovnet, &vnet);
  55.268 +    if(err) goto exit;
  55.269 +    err = child_mac(exp, ovmac, vmac.mac);
  55.270 +    if(err) goto exit;
  55.271 +    err = ctrl_vif_del(&vnet, &vmac);
  55.272 +  exit:
  55.273 +    return err;
  55.274 +}
  55.275 +
  55.276 +static int eval_vif_list(Sxpr exp, IOStream *out, void *data){
  55.277 +    int err = 0;
  55.278 +    vif_print(out);
  55.279 +    return err;
  55.280 +}
  55.281 +
  55.282 +/** Eval a vnet add request.
  55.283 + *
  55.284 + * (peer.add (addr <addr>) [(port <port>)])
  55.285 + *
  55.286 + * @param exp request
  55.287 + * @param out output stream
  55.288 + * @param data data
  55.289 + * @return 0 on success, error code otherwise
  55.290 + */
  55.291 +int eval_peer_add(Sxpr exp, IOStream *out, void *data){
  55.292 +    int err = 0;
  55.293 +    Sxpr oaddr = intern("addr");
  55.294 +    Sxpr oport = intern("port");
  55.295 +    VarpAddr addr = { .family = AF_INET };
  55.296 +    int port;
  55.297 +
  55.298 +    err = child_addr(exp, oaddr, &addr.u.ip4.s_addr);
  55.299 +    if(err < 0) goto exit;
  55.300 +    err = child_int(exp, oport, &port);
  55.301 +    if(err < 0){
  55.302 +        err = 0;
  55.303 +        port = varp_port;
  55.304 +    }
  55.305 +    if(err) goto exit;
  55.306 +    err = vnet_peer_add(&addr, port);
  55.307 +  exit:
  55.308 +    return err;
  55.309 +}
  55.310 +
  55.311 +/** Eval a peer delete request.
  55.312 + *
  55.313 + * (peer.del (addr <addr>))
  55.314 + *
  55.315 + * @param vnetd vnetd
  55.316 + * @param exp request
  55.317 + * @param out output stream
  55.318 + * @param data data
  55.319 + * @return 0 on success, error code otherwise
  55.320 + */
  55.321 +static int eval_peer_del(Sxpr exp, IOStream *out, void *data){
  55.322 +    int err = 0;
  55.323 +    Sxpr oaddr = intern("addr");
  55.324 +    VarpAddr addr = { .family = AF_INET };
  55.325 +
  55.326 +    err = child_addr(exp, oaddr, &addr.u.ip4.s_addr);
  55.327 +    if(err < 0) goto exit;
  55.328 +    err = vnet_peer_del(&addr);
  55.329 +  exit:
  55.330 +    return err;
  55.331 +}
  55.332 +
  55.333 +/** Eval a peer list request.
  55.334 + *
  55.335 + * (peer.list)
  55.336 + *
  55.337 + * @param exp request
  55.338 + * @param out output stream
  55.339 + * @param data data
  55.340 + * @return 0 on success, error code otherwise
  55.341 + */
  55.342 +static int eval_peer_list(Sxpr exp, IOStream *out, void *data){
  55.343 +    int err = 0;
  55.344 +    vnet_peer_print(out);
  55.345 +    return err;
  55.346 +}
  55.347 +
  55.348 +int vnet_eval_defs(SxprEval *defs, Sxpr exp, IOStream *io, void *data){
  55.349 +    int err = 0;
  55.350 +    SxprEval *def;
  55.351 +
  55.352 +    iprintf("> "); objprint(iostdout, exp, 0); IOStream_print(iostdout, "\n");
  55.353 +    err = -ENOSYS;
  55.354 +    for(def = defs; !NONEP(def->name); def++){
  55.355 +        if(sxpr_elementp(exp, def->name)){
  55.356 +            err = def->fn(exp, io, data);
  55.357 +            break;
  55.358 +        }
  55.359 +    }
  55.360 +    iprintf("< err=%d\n", err);
  55.361 +    return err;
  55.362 +}
  55.363 +
  55.364 +int vnet_eval(Sxpr exp, IOStream *io, void *data){
  55.365 +    SxprEval defs[] = {
  55.366 +        { .name = intern("peer.add"),     .fn = eval_peer_add     },
  55.367 +        { .name = intern("peer.del"),     .fn = eval_peer_del     },
  55.368 +        { .name = intern("peer.list"),    .fn = eval_peer_list    },
  55.369 +        { .name = intern("varp.flush"),   .fn = eval_varp_flush   },
  55.370 +        { .name = intern("varp.list"),    .fn = eval_varp_list    },
  55.371 +        { .name = intern("varp.mcaddr"),  .fn = eval_varp_mcaddr  },
  55.372 +        { .name = intern("varp.print"),   .fn = eval_varp_print   },
  55.373 +        { .name = intern("vif.add"),      .fn = eval_vif_add      },
  55.374 +        { .name = intern("vif.del"),      .fn = eval_vif_del      },
  55.375 +        { .name = intern("vif.list"),     .fn = eval_vif_list     },
  55.376 +        { .name = intern("vnet.add"),     .fn = eval_vnet_add     },
  55.377 +        { .name = intern("vnet.del"),     .fn = eval_vnet_del     },
  55.378 +        { .name = intern("vnet.list"),    .fn = eval_vnet_list    },
  55.379 +        { .name = ONONE, .fn = NULL } };
  55.380 +    return vnet_eval_defs(defs, exp, io, data);
  55.381 +}
    56.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    56.2 +++ b/tools/vnet/vnet-module/vnet_eval.h	Thu Feb 09 16:12:11 2006 +0100
    56.3 @@ -0,0 +1,35 @@
    56.4 +/*
    56.5 + * Copyright (C) 2004, 2005 Mike Wray <mike.wray@hp.com>
    56.6 + *
    56.7 + * This program is free software; you can redistribute it and/or modify
    56.8 + * it under the terms of the GNU General Public License as published by the 
    56.9 + * Free Software Foundation; either version 2 of the License, or (at your
   56.10 + * option) any later version.
   56.11 + * 
   56.12 + * This program is distributed in the hope that it will be useful, but
   56.13 + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
   56.14 + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
   56.15 + * for more details.
   56.16 + *
   56.17 + * You should have received a copy of the GNU General Public License along
   56.18 + * with this program; if not, write to the Free software Foundation, Inc.,
   56.19 + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA
   56.20 + *
   56.21 + */
   56.22 +#ifndef _VNET_EVAL_H_
   56.23 +#define _VNET_EVAL_H_
   56.24 +
   56.25 +#include "sxpr.h"
   56.26 +struct IOStream;
   56.27 +
   56.28 +typedef struct SxprEval {
   56.29 +    Sxpr name;
   56.30 +    int (*fn)(Sxpr, struct IOStream *, void *data);
   56.31 +} SxprEval;
   56.32 +
   56.33 +extern int eval_peer_add(Sxpr exp, struct IOStream *out, void *data);
   56.34 +extern int eval_vnet_add(Sxpr exp, struct IOStream *out, void *data);
   56.35 +extern int vnet_eval_defs(SxprEval *defs, Sxpr exp, struct IOStream *out, void *data);
   56.36 +extern int vnet_eval(Sxpr exp, struct IOStream *out, void *data);
   56.37 +
   56.38 +#endif /* ! _VNET_EVAL_H_ */
    57.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    57.2 +++ b/tools/vnet/vnet-module/vnet_forward.c	Thu Feb 09 16:12:11 2006 +0100
    57.3 @@ -0,0 +1,383 @@
    57.4 +/*
    57.5 + * Copyright (C) 2005, 2006 Mike Wray <mike.wray@hp.com>
    57.6 + *
    57.7 + * This program is free software; you can redistribute it and/or modify
    57.8 + * it under the terms of the GNU General Public License as published by the 
    57.9 + * Free Software Foundation; either version 2 of the License, or (at your
   57.10 + * option) any later version.
   57.11 + * 
   57.12 + * This program is distributed in the hope that it will be useful, but
   57.13 + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
   57.14 + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
   57.15 + * for more details.
   57.16 + *
   57.17 + * You should have received a copy of the GNU General Public License along
   57.18 + * with this program; if not, write to the Free software Foundation, Inc.,
   57.19 + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA
   57.20 + *
   57.21 + */
   57.22 +#ifdef __KERNEL__
   57.23 +
   57.24 +#include <linux/config.h>
   57.25 +#include <linux/module.h>
   57.26 +#include <linux/types.h>
   57.27 +#include <linux/kernel.h>
   57.28 +#include <linux/init.h>
   57.29 +
   57.30 +#include <linux/version.h>
   57.31 +#include <linux/spinlock.h>
   57.32 +
   57.33 +#include <linux/skbuff.h>
   57.34 +#include <linux/net.h>
   57.35 +#include <linux/netdevice.h>
   57.36 +#include <linux/in.h>
   57.37 +#include <linux/inet.h>
   57.38 +#include <linux/netfilter_bridge.h>
   57.39 +#include <linux/netfilter_ipv4.h>
   57.40 +#include <linux/udp.h>
   57.41 +
   57.42 +#include <net/ip.h>
   57.43 +#include <net/protocol.h>
   57.44 +#include <net/route.h>
   57.45 +#include <net/checksum.h>
   57.46 +
   57.47 +#else
   57.48 +
   57.49 +#include <netinet/in.h>
   57.50 +#include <arpa/inet.h>
   57.51 +
   57.52 +#include "sys_kernel.h"
   57.53 +#include "spinlock.h"
   57.54 +#include "skbuff.h"
   57.55 +#include <linux/ip.h>
   57.56 +#include <linux/udp.h>
   57.57 +
   57.58 +#endif
   57.59 +
   57.60 +#include <varp.h>
   57.61 +#include <if_varp.h>
   57.62 +#include <varp.h>
   57.63 +#include <skb_util.h>
   57.64 +#include <skb_context.h>
   57.65 +
   57.66 +#include "allocate.h"
   57.67 +#include "iostream.h"
   57.68 +#include "hash_table.h"
   57.69 +#include "vnet_forward.h"
   57.70 +
   57.71 +#define MODULE_NAME "VNET"
   57.72 +#define DEBUG 1
   57.73 +#undef DEBUG
   57.74 +#include "debug.h"
   57.75 +
   57.76 +extern int _skb_xmit(struct sk_buff *skb, uint32_t saddr);
   57.77 +
   57.78 +typedef struct VnetPeer {
   57.79 +    struct VarpAddr addr;
   57.80 +    uint16_t port;
   57.81 +    atomic_t refcount;
   57.82 +    int tx_packets;
   57.83 +    int rx_packets;
   57.84 +} VnetPeer;
   57.85 +
   57.86 +static HashTable *vnet_peer_table = NULL;
   57.87 +static rwlock_t vnet_peer_table_lock = RW_LOCK_UNLOCKED;
   57.88 +
   57.89 +#define vnet_peer_read_lock(flags)    read_lock_irqsave(&vnet_peer_table_lock, (flags))
   57.90 +#define vnet_peer_read_unlock(flags)  read_unlock_irqrestore(&vnet_peer_table_lock, (flags))
   57.91 +#define vnet_peer_write_lock(flags)   write_lock_irqsave(&vnet_peer_table_lock, (flags))
   57.92 +#define vnet_peer_write_unlock(flags) write_unlock_irqrestore(&vnet_peer_table_lock, (flags))
   57.93 +
   57.94 +static void VnetPeer_decref(VnetPeer *peer){
   57.95 +    if(!peer) return;
   57.96 +    if(atomic_dec_and_test(&peer->refcount)){
   57.97 +        kfree(peer);
   57.98 +    }
   57.99 +}
  57.100 +
  57.101 +static void VnetPeer_incref(VnetPeer *peer){
  57.102 +    if(!peer) return;
  57.103 +    atomic_inc(&peer->refcount);
  57.104 +}
  57.105 +
  57.106 +static void VnetPeer_print(VnetPeer *peer, IOStream *io){
  57.107 +    char addrbuf[VARP_ADDR_BUF];
  57.108 +    
  57.109 +    IOStream_print(io, "(vnet_peer\n");
  57.110 +    IOStream_print(io, "  (addr %s)\n", VarpAddr_ntoa(&peer->addr, addrbuf));
  57.111 +    IOStream_print(io, "  (port %d)\n", htons(peer->port));
  57.112 +    IOStream_print(io, "  (tx_packets %d)\n", peer->tx_packets);
  57.113 +    IOStream_print(io, "  (rx_packets %d)\n", peer->tx_packets);
  57.114 +    IOStream_print(io, ")\n");
  57.115 +}
  57.116 +
  57.117 +static int VnetPeer_forward(VnetPeer *peer, struct sk_buff *fwdskb){
  57.118 +    int err = 0;
  57.119 +    const int ip_n = sizeof(struct iphdr);
  57.120 +    const int udp_n = sizeof(struct udphdr);
  57.121 +    const int vnet_n = sizeof(struct VnetMsgHdr);
  57.122 +    int head_n = 16 + ip_n + udp_n + vnet_n;
  57.123 +    int push_n = 0;
  57.124 +    struct sk_buff *skb = NULL;
  57.125 +    struct VnetMsgHdr *vhdr;
  57.126 +    uint32_t saddr = 0;
  57.127 +    uint16_t sport = varp_port;
  57.128 +    uint32_t daddr = peer->addr.u.ip4.s_addr;
  57.129 +    uint16_t dport = varp_port;
  57.130 +
  57.131 +    if(!fwdskb) goto exit;
  57.132 +    if(daddr == fwdskb->nh.iph->saddr){
  57.133 +        // Don't forward if the skb src addr is the peer addr.
  57.134 +        dprintf("> Forward loop on " IPFMT "\n", NIPQUAD(daddr));
  57.135 +        goto exit;
  57.136 +    }
  57.137 +    // On entry fwdskb->data should be at fwdskb->nh.raw (adjust if not).
  57.138 +    // Also fwdskb->h.raw and fwdskb->nh.raw are set.
  57.139 +    if(fwdskb->data > fwdskb->nh.raw){
  57.140 +        push_n = fwdskb->data - fwdskb->nh.raw;
  57.141 +        head_n += push_n;
  57.142 +    }
  57.143 +    // If has headroom, copies header (which incs ref on dst),
  57.144 +    // otherwise only clones header, which does not inc ref on dst.
  57.145 +    skb = skb_realloc_headroom(fwdskb, head_n);
  57.146 +    //skb = skb_copy_expand(fwdskb, head_n, 0, GFP_ATOMIC);
  57.147 +    if(!skb){
  57.148 +        err = -ENOMEM;
  57.149 +        goto exit;
  57.150 +    }
  57.151 +
  57.152 +    if(push_n){
  57.153 +        skb_push(skb, push_n);
  57.154 +    }
  57.155 +
  57.156 +#ifdef DEBUG
  57.157 +    printk("\nOriginal packet:\n");
  57.158 +    print_iphdr(__FUNCTION__, skb);
  57.159 +    skb_print_bits(__FUNCTION__, skb, 0, skb->len);
  57.160 +#endif
  57.161 +
  57.162 +    skb->mac.raw = NULL;
  57.163 +    vhdr = (void*)skb_push(skb, vnet_n);
  57.164 +    vhdr->id       = htons(VFWD_ID);
  57.165 +    vhdr->opcode   = 0;
  57.166 +
  57.167 +    // Setup the UDP header.
  57.168 +    skb->h.raw = skb_push(skb, udp_n);
  57.169 +    skb->h.uh->source = sport;		        // Source port.
  57.170 +    skb->h.uh->dest   = dport;		        // Destination port.
  57.171 +    skb->h.uh->len    = htons(skb->len);	// Total packet length (bytes).
  57.172 +    skb->h.uh->check  = 0;
  57.173 +
  57.174 +    // Setup the IP header.
  57.175 +    skb->nh.raw = skb_push(skb, ip_n); 
  57.176 +    skb->nh.iph->version  = 4;			// Standard version.
  57.177 +    skb->nh.iph->ihl      = ip_n / 4;		// IP header length (32-bit words).
  57.178 +    skb->nh.iph->tos      = 0;			// No special type-of-service.
  57.179 +    skb->nh.iph->tot_len  = htons(skb->len);    // Total packet length (bytes).
  57.180 +    skb->nh.iph->id       = 0;			// No flow id.
  57.181 +    skb->nh.iph->protocol = IPPROTO_UDP;        // IP protocol number.
  57.182 +    skb->nh.iph->frag_off = 0;
  57.183 +    skb->nh.iph->ttl      = 64;			// Linux default time-to-live.
  57.184 +    skb->nh.iph->saddr    = saddr;		// Source address.
  57.185 +    skb->nh.iph->daddr    = daddr;              // Destination address.
  57.186 +    skb->nh.iph->check    = 0;
  57.187 +
  57.188 +#ifdef DEBUG
  57.189 +    printk("\nWrapped packet:\n");
  57.190 +    print_iphdr(__FUNCTION__, skb);
  57.191 +    print_udphdr(__FUNCTION__, skb);
  57.192 +    skb_print_bits(__FUNCTION__, skb, 0, 0 * skb->len);
  57.193 +#endif
  57.194 +
  57.195 +    err = _skb_xmit(skb, saddr);
  57.196 +    peer->tx_packets++;
  57.197 +
  57.198 +  exit:
  57.199 +    if(err < 0) kfree_skb(skb);
  57.200 +    return err;
  57.201 +}
  57.202 +
  57.203 +int vnet_peer_get(VarpAddr *addr, VnetPeer **peer){
  57.204 +    unsigned long flags;
  57.205 +
  57.206 +    vnet_peer_read_lock(flags);
  57.207 +    *peer = HashTable_get(vnet_peer_table, addr);
  57.208 +    VnetPeer_incref(*peer);
  57.209 +    vnet_peer_read_unlock(flags);
  57.210 +    return (*peer ? 0 : -ENOENT);
  57.211 +}
  57.212 +
  57.213 +int vnet_peer_add(VarpAddr *addr, uint16_t port){
  57.214 +    int err = 0;
  57.215 +    unsigned long flags;
  57.216 +    VnetPeer *peer;
  57.217 +    
  57.218 +    vnet_peer_write_lock(flags);
  57.219 +    peer = HashTable_get(vnet_peer_table, addr);
  57.220 +    if(peer){
  57.221 +        VnetPeer_incref(peer);
  57.222 +        goto exit;
  57.223 +    }
  57.224 +    peer = ALLOCATE(VnetPeer);
  57.225 +    if(!peer){
  57.226 +        err = -ENOMEM;
  57.227 +        goto exit;
  57.228 +    }
  57.229 +    peer->addr = *addr;
  57.230 +    peer->port = port;
  57.231 +    VnetPeer_incref(peer);
  57.232 +    if(!HashTable_add(vnet_peer_table, &peer->addr, peer)){
  57.233 +        VnetPeer_decref(peer);
  57.234 +        err = -ENOMEM;
  57.235 +    }
  57.236 +  exit:
  57.237 +    vnet_peer_write_unlock(flags);
  57.238 +    return err;
  57.239 +}
  57.240 +
  57.241 +int vnet_peer_del(VarpAddr *addr){
  57.242 +    int ret = 0;
  57.243 +    unsigned long flags;
  57.244 +
  57.245 +    vnet_peer_write_lock(flags);
  57.246 +    ret = HashTable_remove(vnet_peer_table, addr);
  57.247 +    vnet_peer_write_unlock(flags);
  57.248 +    return ret;
  57.249 +}
  57.250 +
  57.251 +void vnet_peer_print(IOStream *io){
  57.252 +    HashTable_for_decl(entry);
  57.253 +    unsigned long flags;
  57.254 +
  57.255 +    if(!vnet_peer_table) return;
  57.256 +    vnet_peer_read_lock(flags);
  57.257 +    HashTable_for_each(entry, vnet_peer_table){
  57.258 +        VnetPeer *peer = entry->value;
  57.259 +        VnetPeer_print(peer, io);
  57.260 +    }
  57.261 +    vnet_peer_read_unlock(flags);
  57.262 +}
  57.263 +
  57.264 +int vnet_forward_send(struct sk_buff *skb){
  57.265 +    int err = 0;
  57.266 +    unsigned long flags;
  57.267 +    HashTable_for_decl(entry);
  57.268 +    int count = 0;
  57.269 +
  57.270 +    if(!vnet_peer_table){
  57.271 +        goto exit;
  57.272 +    }
  57.273 +    vnet_peer_read_lock(flags);
  57.274 +    HashTable_for_each(entry, vnet_peer_table){
  57.275 +        VnetPeer *peer = entry->value;
  57.276 +        VnetPeer_forward(peer, skb);
  57.277 +        count++;
  57.278 +    }
  57.279 +    vnet_peer_read_unlock(flags);
  57.280 +  exit:
  57.281 +    return err;
  57.282 +}
  57.283 +
  57.284 +int vnet_forward_recv(struct sk_buff *skb){
  57.285 +    int err = 0;
  57.286 +    VarpAddr addr = { .family = AF_INET };
  57.287 +    VnetPeer *peer = NULL;
  57.288 +    unsigned char eth[ETH_HLEN] = {};
  57.289 +    struct sk_buff *recvskb;
  57.290 +
  57.291 +    if(!vnet_peer_table){
  57.292 +        dprintf("> no table\n");
  57.293 +        return -ENOSYS;
  57.294 +    }
  57.295 +    // On entry mac.raw, h.raw, nh.raw are set.
  57.296 +    // skb->data points after the fwd vnet header, at the complete
  57.297 +    // forwarded packet (which has IP hdr, no eth hdr).
  57.298 +
  57.299 +    // Save the eth hdr and source addr (peer).
  57.300 +    memcpy(eth, skb->mac.raw, ETH_HLEN);
  57.301 +    addr.u.ip4.s_addr = skb->nh.iph->saddr;
  57.302 +    err = vnet_peer_get(&addr, &peer);
  57.303 +    if(err){
  57.304 +        wprintf("> no peer for " IPFMT "\n", NIPQUAD(skb->nh.iph->saddr));
  57.305 +        goto exit;
  57.306 +    }
  57.307 +    peer->rx_packets++;
  57.308 +    skb->mac.raw = NULL;
  57.309 +    skb->nh.raw = skb->data;
  57.310 +    skb->h.raw = (void*)(skb->nh.iph + 1);
  57.311 +    if(!skb->nh.iph->saddr){
  57.312 +        skb->nh.iph->saddr = addr.u.ip4.s_addr;
  57.313 +    }
  57.314 +#ifdef __KERNEL__
  57.315 +    // Fix IP options, checksum, skb dst, netfilter state.
  57.316 +    memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
  57.317 +    skb->dev = NULL;
  57.318 +    dst_release(skb->dst);
  57.319 +    skb->dst = NULL;
  57.320 +    nf_reset(skb);
  57.321 +#endif // __KERNEL__
  57.322 +
  57.323 +    skb->mac.raw = skb->nh.raw - ETH_HLEN;
  57.324 +    memcpy(skb->mac.raw, eth, ETH_HLEN);
  57.325 +
  57.326 +    // Map destination mcast addresses to our mcast address.
  57.327 +    if(MULTICAST(skb->nh.iph->daddr)){
  57.328 +        skb->nh.iph->daddr = varp_mcast_addr;
  57.329 +        //xmit does this: ip_eth_mc_map(varp_mcast_addr, eth_hdr(skb)->h_dest);
  57.330 +    }
  57.331 +
  57.332 +    // Handle (a copy of) it ourselves, because
  57.333 +    // if it is looped-back by xmit it will be ignored.
  57.334 +    //recvskb = skb_clone(skb, GFP_ATOMIC);
  57.335 +    recvskb = pskb_copy(skb, GFP_ATOMIC);
  57.336 +    if(recvskb){
  57.337 +        // Data points at the unwrapped iphdr, but varp_handle_message()
  57.338 +        // expects it to point at the udphdr, so pull.
  57.339 +        skb_pull(recvskb, sizeof(struct iphdr));
  57.340 +        if(varp_handle_message(recvskb) <= 0){
  57.341 +            kfree_skb(recvskb);
  57.342 +        }
  57.343 +    }
  57.344 +    err = _skb_xmit(skb, skb->nh.iph->saddr);
  57.345 +    if(err >= 0) err = 1;
  57.346 +  exit:
  57.347 +    return err;
  57.348 +}
  57.349 +
  57.350 +/** Hash function for keys in the peer table.
  57.351 + */
  57.352 +static Hashcode peer_key_hash_fn(void *k){
  57.353 +    return hash_hvoid(0, k, sizeof(struct VarpAddr));
  57.354 +}
  57.355 +
  57.356 +/** Equality function for keys in the peer table.
  57.357 + */
  57.358 +static int peer_key_equal_fn(void *k1, void *k2){
  57.359 +    return memcmp(k1, k2, sizeof(struct VarpAddr)) == 0;
  57.360 +}
  57.361 +
  57.362 +static void peer_entry_free_fn(HashTable *table, HTEntry *entry){
  57.363 +    if(!entry) return;
  57.364 +    VnetPeer_decref((VnetPeer*)entry->value);
  57.365 +    HTEntry_free(entry);
  57.366 +}
  57.367 +
  57.368 +int vnet_forward_init(void){
  57.369 +    int err = 0;
  57.370 +    if(vnet_peer_table) goto exit;
  57.371 +    vnet_peer_table = HashTable_new(0);
  57.372 +    if(!vnet_peer_table){
  57.373 +        err = -ENOMEM;
  57.374 +        goto exit;
  57.375 +    }
  57.376 +    vnet_peer_table->key_equal_fn = peer_key_equal_fn;
  57.377 +    vnet_peer_table->key_hash_fn = peer_key_hash_fn;
  57.378 +    vnet_peer_table->entry_free_fn = peer_entry_free_fn;
  57.379 +  exit:
  57.380 +    return err;
  57.381 +}
  57.382 +
  57.383 +void vnet_forward_exit(void){
  57.384 +    HashTable_free(vnet_peer_table);
  57.385 +    vnet_peer_table = NULL;
  57.386 +}
    58.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    58.2 +++ b/tools/vnet/vnet-module/vnet_forward.h	Thu Feb 09 16:12:11 2006 +0100
    58.3 @@ -0,0 +1,36 @@
    58.4 +/*
    58.5 + * Copyright (C) 2005, 2006 Mike Wray <mike.wray@hp.com>
    58.6 + *
    58.7 + * This program is free software; you can redistribute it and/or modify
    58.8 + * it under the terms of the GNU General Public License as published by the 
    58.9 + * Free Software Foundation; either version 2 of the License, or (at your
   58.10 + * option) any later version.
   58.11 + * 
   58.12 + * This program is distributed in the hope that it will be useful, but
   58.13 + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
   58.14 + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
   58.15 + * for more details.
   58.16 + *
   58.17 + * You should have received a copy of the GNU General Public License along
   58.18 + * with this program; if not, write to the Free software Foundation, Inc.,
   58.19 + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA
   58.20 + *
   58.21 + */
   58.22 +#ifndef _VNET_FORWARD_H_
   58.23 +#define _VNET_FORWARD_H_
   58.24 +
   58.25 +#include <if_varp.h>
   58.26 +
   58.27 +struct sk_buff;
   58.28 +struct IOStream;
   58.29 +
   58.30 +extern int vnet_peer_add(struct VarpAddr *addr, uint16_t port);
   58.31 +extern int vnet_peer_del(struct VarpAddr *addr);
   58.32 +extern void vnet_peer_print(struct IOStream *io);
   58.33 +
   58.34 +extern int vnet_forward_send(struct sk_buff *skb);
   58.35 +extern int vnet_forward_recv(struct sk_buff *skb);
   58.36 +extern int vnet_forward_init(void);
   58.37 +extern void vnet_forward_exit(void);
   58.38 +
   58.39 +#endif /* _VNET_FORWARD_H_ */
    59.1 --- a/tools/vnet/vnet-module/vnet_ioctl.c	Thu Feb 09 16:09:00 2006 +0100
    59.2 +++ b/tools/vnet/vnet-module/vnet_ioctl.c	Thu Feb 09 16:12:11 2006 +0100
    59.3 @@ -1,5 +1,5 @@
    59.4  /*
    59.5 - * Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
    59.6 + * Copyright (C) 2004, 2005 Mike Wray <mike.wray@hp.com>
    59.7   *
    59.8   * This program is free software; you can redistribute it and/or modify
    59.9   * it under the terms of the GNU General Public License as published by the 
   59.10 @@ -40,95 +40,25 @@
   59.11  #include "vnet.h"
   59.12  #include "varp.h"
   59.13  #include "vnet_dev.h"
   59.14 +#include "vnet_eval.h"
   59.15 +#include "vnet_forward.h"
   59.16  
   59.17 -#include "sxpr_parser.h"
   59.18  #include "iostream.h"
   59.19  #include "kernel_stream.h"
   59.20 +#include "mem_stream.h"
   59.21  #include "sys_string.h"
   59.22  #include "sys_net.h"
   59.23 +#include "sxpr_parser.h"
   59.24  
   59.25  #define MODULE_NAME "VNET"
   59.26  #define DEBUG 1
   59.27  #undef DEBUG
   59.28  #include "debug.h"
   59.29  
   59.30 -// Functions to manage vnets.
   59.31 -/*
   59.32 -
   59.33 -Have to rely on ethernet bridging being configured - but we can't rely
   59.34 -on the kernel interface being available to us (it's not exported @!$"%!).
   59.35 -
   59.36 -Create a vnet N:
   59.37 -- create the vnet device vnifN: using commands to /proc, kernel api
   59.38 -- create the vnet bridge vnetN: using brctl in user-space
   59.39 -- for best results something should keep track of the mapping vnet id <-> bridge name
   59.40 -
   59.41 -Add vif device vifD.N to vnet N.
   59.42 -- domain is configured with vifD.N on bridge vnetN
   59.43 -- vif script adds vif to bridge using brctl
   59.44 -- vif script detects that the bridge is a vnet bridge and
   59.45 -  uses /proc commands to configure the mac on the vnet
   59.46 -
   59.47 -Wouldn't be hard to add support for specifying vnet keys(s) in
   59.48 -the control interface.
   59.49 -
   59.50 -*/
   59.51 -
   59.52 -    // id         vnet id
   59.53 -    // security   security level
   59.54 -    // ciphersuite: digest, cipher, keys??
   59.55 -/* Security policy.
   59.56 -   vnet
   59.57 -   src: mac
   59.58 -   dst: mac
   59.59 -   coa: ip
   59.60 -   Map vnet x coa -> security (none, auth, conf)
   59.61 -
   59.62 -   Policy, e.g.
   59.63 -   - same subnet x vnet
   59.64 -   - diff subnet x vnet
   59.65 -   - some subnet x vnet
   59.66 -   - some host addr x vnet
   59.67 -
   59.68 -   (security (net local) (vnet *) (mode none))
   59.69 -   (security (net (not local))
   59.70 -
   59.71 -   (security (addr, vnet) (local-subnet addr)       none)
   59.72 -   (security (addr, vnet) (not (local-subnet addr)) conf)
   59.73 -   (security (addr, vnet) (host 15.144.27.80)
   59.74 -   (security (addr, vnet) (subnet addr 15.144.24.0/24) auth)
   59.75 -   (security (addr, vnet) t auth)
   59.76 -
   59.77 -   (security (addr local)         (mode none))
   59.78 -   (security (addr local/16)      (mode none))
   59.79 -   (security (addr 15.144.0.0/16) (mode auth))
   59.80 -   (security (addr 15.0.0.0/8)    (mode conf))
   59.81 -   (security (addr *)             (mode drop))
   59.82 -
   59.83 -   ?Varp security
   59.84 -   Use esp too - none, auth, conf,
   59.85 -   Varp sends broadcasts (requests) and unicasts (replies).
   59.86 -   Uses UDP. Could send over ESP if needed.
   59.87 -   For bcast don't know where it goes, so security has to be by vnet.
   59.88 -   For ucast know where it goes, so could do by vnet and addr.
   59.89 -
   59.90 -   Similar issue for vnets: know where unicast goes but don't know where
   59.91 -   bcast goes.
   59.92 -
   59.93 -   Simplify: 2 levels
   59.94 -   local ucast
   59.95 -   nonlocal ucast, mcast
   59.96 -
   59.97 -   (security (local none) (nonlocal conf))
   59.98 -   (security (local auth) (nonlocal conf))
   59.99 -
  59.100 -   VARP security matches vnet security.
  59.101 -
  59.102 - */
  59.103 -
  59.104  /** @file
  59.105   *
  59.106   * Kernel interface to files in /proc.
  59.107 + * todo: Add a sysfs interface using kobject.
  59.108   */
  59.109  
  59.110  #define PROC_ROOT "/proc/"
  59.111 @@ -137,6 +67,10 @@ the control interface.
  59.112  
  59.113  enum {
  59.114      VNET_POLICY = 1,
  59.115 +    VNET_VNETS,
  59.116 +    VNET_VIFS,
  59.117 +    VNET_VARP,
  59.118 +    VNET_PEERS,
  59.119  };
  59.120  
  59.121  typedef struct proc_dir_entry ProcEntry;
  59.122 @@ -144,14 +78,12 @@ typedef struct inode Inode;
  59.123  typedef struct file File;
  59.124  
  59.125  static int proc_open_fn(struct inode *inode, File *file);
  59.126 -static ssize_t proc_read_fn(File *file, char *buffer, size_t count, loff_t *offset);
  59.127 -static ssize_t proc_write_fn(File *file, const char *buffer, size_t count, loff_t *offset) ;
  59.128 +//static ssize_t proc_read_fn(File *file, char *buffer, size_t count, loff_t *offset);
  59.129 +//static ssize_t proc_write_fn(File *file, const char *buffer, size_t count, loff_t *offset) ;
  59.130  //static int proc_flush_fn(File *file);
  59.131  static loff_t proc_lseek_fn(File * file, loff_t offset, int orig);
  59.132  static int proc_ioctl_fn(struct inode *inode, File *file, unsigned opcode, unsigned long arg);
  59.133 -static int proc_release_fn(struct inode *inode, File *file);
  59.134 -
  59.135 -static int eval(Sxpr exp);
  59.136 +//static int proc_release_fn(struct inode *inode, File *file);
  59.137  
  59.138  static int ProcEntry_has_name(ProcEntry *entry, const char *name, int namelen){
  59.139      dprintf("> name=%.*s entry=%.*s\n", namelen, name, entry->namelen, entry->name);
  59.140 @@ -165,17 +97,6 @@ static int ProcEntry_has_name(ProcEntry 
  59.141  // Is release called after an error?
  59.142  //
  59.143  
  59.144 -static struct file_operations proc_file_ops = {
  59.145 -    //owner:   THIS_MODULE,
  59.146 -    open:    proc_open_fn,
  59.147 -    read:    proc_read_fn,
  59.148 -    write:   proc_write_fn,
  59.149 -    //flush:   proc_flush_fn,
  59.150 -    llseek:  proc_lseek_fn,
  59.151 -    ioctl:   proc_ioctl_fn,
  59.152 -    release: proc_release_fn,
  59.153 -};
  59.154 -
  59.155  static int proc_get_parser(File *file, Parser **val){
  59.156      int err = 0;
  59.157      Parser *parser = NULL;
  59.158 @@ -200,6 +121,7 @@ static int proc_open_fn(Inode *inode, Fi
  59.159      // Get entry from
  59.160      //ProcEntry *entry = (ProcEntry *)inode->u.generic_ip;
  59.161      //file->private_data = NULL;
  59.162 +    //file->f_dentry->d_ino is inode.
  59.163      // Check for user privilege - deny otherwise.
  59.164      // -EACCESS
  59.165      int err = 0;
  59.166 @@ -221,33 +143,13 @@ static ssize_t proc_read_fn(File *file, 
  59.167      return count;
  59.168  }
  59.169  
  59.170 +#if 0
  59.171  static ssize_t proc_write_fn(File *file, const char *buffer,
  59.172                               size_t count, loff_t *offset) {
  59.173 -    // User write.
  59.174 -    // Copy data into kernel space from buffer.
  59.175 -    // Increment offset by count, return count (or code).
  59.176 -    int err = 0;
  59.177 -    char *data = NULL;
  59.178 -    Parser *parser = NULL;
  59.179 +    return -EINVAL;
  59.180 +}
  59.181 +#endif
  59.182  
  59.183 -    //dprintf("> count=%d\n", count);
  59.184 -    err = proc_get_parser(file, &parser);
  59.185 -    if(err) goto exit;
  59.186 -    data = allocate(count);
  59.187 -    if(!data){
  59.188 -        err = -ENOMEM;
  59.189 -        goto exit;
  59.190 -    }
  59.191 -    err = copy_from_user(data, buffer, count);
  59.192 -    if(err) goto exit;
  59.193 -    *offset += count;
  59.194 -    err = Parser_input(parser, data, count);
  59.195 -  exit:
  59.196 -    deallocate(data);
  59.197 -    err = (err < 0 ? err : count);
  59.198 -    //dprintf("< err = %d\n", err);
  59.199 -    return err;
  59.200 -}
  59.201  
  59.202  #if 0
  59.203  static int proc_flush_fn(File *file){
  59.204 @@ -299,7 +201,33 @@ static int proc_ioctl_fn(Inode *inode, F
  59.205      return 0;
  59.206  }
  59.207  
  59.208 -static int proc_release_fn(Inode *inode, File *file){
  59.209 +static ssize_t proc_policy_write_fn(File *file, const char *buffer,
  59.210 +                             size_t count, loff_t *offset) {
  59.211 +    // User write.
  59.212 +    // Copy data into kernel space from buffer.
  59.213 +    // Increment offset by count, return count (or code).
  59.214 +    int err = 0;
  59.215 +    char *data = NULL;
  59.216 +    Parser *parser = NULL;
  59.217 +
  59.218 +    err = proc_get_parser(file, &parser);
  59.219 +    if(err) goto exit;
  59.220 +    data = allocate(count);
  59.221 +    if(!data){
  59.222 +        err = -ENOMEM;
  59.223 +        goto exit;
  59.224 +    }
  59.225 +    err = copy_from_user(data, buffer, count);
  59.226 +    if(err) goto exit;
  59.227 +    *offset += count;
  59.228 +    err = Parser_input(parser, data, count);
  59.229 +  exit:
  59.230 +    deallocate(data);
  59.231 +    err = (err < 0 ? err : count);
  59.232 +    return err;
  59.233 +}
  59.234 +
  59.235 +static int proc_policy_release_fn(Inode *inode, File *file){
  59.236      // User close.
  59.237      // Cleanup file->private_data, return errcode.
  59.238      int err = 0;
  59.239 @@ -313,7 +241,7 @@ static int proc_release_fn(Inode *inode,
  59.240      if(err) goto exit;
  59.241      obj = parser->val;
  59.242      for(l = obj; CONSP(l); l = CDR(l)){
  59.243 -        err = eval(CAR(l));
  59.244 +        err = vnet_eval(CAR(l), iostdout, NULL);
  59.245          if(err) break;
  59.246      }
  59.247    exit:
  59.248 @@ -323,6 +251,130 @@ static int proc_release_fn(Inode *inode,
  59.249      return err;
  59.250  }
  59.251  
  59.252 +static int proc_io_open(Inode *inode, File *file, IOStream **val){
  59.253 +    int err = 0;
  59.254 +    IOStream *io = mem_stream_new();
  59.255 +    if(!io){
  59.256 +        err = -ENOMEM;
  59.257 +        goto exit;
  59.258 +    }
  59.259 +    file->private_data = io;
  59.260 +  exit:
  59.261 +    *val = (err ? NULL: io);
  59.262 +    return err;
  59.263 +}
  59.264 +
  59.265 +static ssize_t proc_io_read_fn(File *file, char *buffer,
  59.266 +                               size_t count, loff_t *offset){
  59.267 +    // User read.
  59.268 +    // Copy data to user buffer, increment offset by count, return count.
  59.269 +    int err = 0;
  59.270 +    char kbuf[1024] = {};
  59.271 +    int kbuf_n = sizeof(kbuf);
  59.272 +    int k, n = 0;
  59.273 +    char *ubuf = buffer;
  59.274 +    IOStream *io = file->private_data;
  59.275 +
  59.276 +    dprintf(">\n");
  59.277 +    if(!io) goto exit;
  59.278 +    while(n < count){
  59.279 +        k = count - n;
  59.280 +        if(k > kbuf_n){
  59.281 +            k = kbuf_n;
  59.282 +        }
  59.283 +        k = IOStream_read(io, kbuf, k);
  59.284 +        if(k <= 0) break;
  59.285 +        if(copy_to_user(ubuf, kbuf, k)){
  59.286 +            err = -EFAULT;
  59.287 +            goto exit;
  59.288 +        }
  59.289 +        n += k;
  59.290 +        ubuf += k;
  59.291 +    }
  59.292 +    *offset += n;
  59.293 +  exit:
  59.294 +    return (err ? err : n);
  59.295 +}
  59.296 +
  59.297 +static int proc_io_release_fn(Inode *inode, File *file){
  59.298 +    // User close.
  59.299 +    int err = 0;
  59.300 +    IOStream *io = file->private_data;
  59.301 +    if(io) IOStream_close(io);
  59.302 +    dprintf("< err=%d\n", err);
  59.303 +    return err;
  59.304 +}
  59.305 +
  59.306 +static int proc_vnets_open_fn(Inode *inode, File *file){
  59.307 +    int err = 0;
  59.308 +    IOStream *io;
  59.309 +    if(proc_io_open(inode, file, &io)) goto exit;
  59.310 +    vnet_print(io);
  59.311 +  exit:
  59.312 +    return err;
  59.313 +}
  59.314 +
  59.315 +static int proc_vifs_open_fn(Inode *inode, File *file){
  59.316 +    int err = 0;
  59.317 +    IOStream *io;
  59.318 +    if(proc_io_open(inode, file, &io)) goto exit;
  59.319 +    vif_print(io);
  59.320 +  exit:
  59.321 +    return err;
  59.322 +}
  59.323 +
  59.324 +static int proc_peers_open_fn(Inode *inode, File *file){
  59.325 +    int err = 0;
  59.326 +    IOStream *io;
  59.327 +    if(proc_io_open(inode, file, &io)) goto exit;
  59.328 +    vnet_peer_print(io);
  59.329 +  exit:
  59.330 +    return err;
  59.331 +}
  59.332 +
  59.333 +static int proc_varp_open_fn(Inode *inode, File *file){
  59.334 +    int err = 0;
  59.335 +    IOStream *io;
  59.336 +    if(proc_io_open(inode, file, &io)) goto exit;
  59.337 +    varp_print(io);
  59.338 +  exit:
  59.339 +    return err;
  59.340 +}
  59.341 +
  59.342 +static struct file_operations proc_policy_ops = {
  59.343 +    open:    proc_open_fn,
  59.344 +    read:    proc_read_fn,
  59.345 +    write:   proc_policy_write_fn,
  59.346 +    //flush:   proc_flush_fn,
  59.347 +    llseek:  proc_lseek_fn,
  59.348 +    ioctl:   proc_ioctl_fn,
  59.349 +    release: proc_policy_release_fn,
  59.350 +};
  59.351 +
  59.352 +static struct file_operations proc_vnets_ops = {
  59.353 +    open:    proc_vnets_open_fn,
  59.354 +    read:    proc_io_read_fn,
  59.355 +    release: proc_io_release_fn,
  59.356 +};
  59.357 +
  59.358 +static struct file_operations proc_vifs_ops = {
  59.359 +    open:    proc_vifs_open_fn,
  59.360 +    read:    proc_io_read_fn,
  59.361 +    release: proc_io_release_fn,
  59.362 +};
  59.363 +
  59.364 +static struct file_operations proc_peers_ops = {
  59.365 +    open:    proc_peers_open_fn,
  59.366 +    read:    proc_io_read_fn,
  59.367 +    release: proc_io_release_fn,
  59.368 +};
  59.369 +
  59.370 +static struct file_operations proc_varp_ops = {
  59.371 +    open:    proc_varp_open_fn,
  59.372 +    read:    proc_io_read_fn,
  59.373 +    release: proc_io_release_fn,
  59.374 +};
  59.375 +
  59.376  static ProcEntry *proc_fs_root = &proc_root;
  59.377  
  59.378  static int proc_path_init(const char *path, const char **rest){
  59.379 @@ -344,7 +396,6 @@ static int proc_path_init(const char *pa
  59.380      return err;
  59.381  }
  59.382  
  59.383 -
  59.384  /** Parse a path relative to `dir'. If dir is null or the proc root
  59.385   * the path is relative to "/proc/", and the leading "/proc/" may be
  59.386   * supplied.
  59.387 @@ -379,13 +430,14 @@ static ProcEntry * ProcFS_lookup(const c
  59.388      return result;
  59.389  }
  59.390  
  59.391 -static ProcEntry *ProcFS_register(const char *name, ProcEntry *dir, int val){
  59.392 +static ProcEntry *ProcFS_register(const char *name, ProcEntry *dir,
  59.393 +                                  int val, struct file_operations *ops){
  59.394      mode_t mode = 0;
  59.395      ProcEntry *entry;
  59.396  
  59.397      entry = create_proc_entry(name, mode, dir);
  59.398      if(entry){
  59.399 -        entry->proc_fops = &proc_file_ops;
  59.400 +        entry->proc_fops = ops;
  59.401          entry->data = (void*)val; // Whatever data we need.
  59.402      }
  59.403      return entry;
  59.404 @@ -430,366 +482,22 @@ static void ProcFS_rmrec(const char *nam
  59.405      dprintf("<\n");
  59.406  }
  59.407  
  59.408 -static int stringof(Sxpr exp, char **s){
  59.409 -    int err = 0;
  59.410 -    if(ATOMP(exp)){
  59.411 -        *s = atom_name(exp);
  59.412 -    } else if(STRINGP(exp)){
  59.413 -        *s = string_string(exp);
  59.414 -    } else {
  59.415 -        err = -EINVAL;
  59.416 -        *s = NULL;
  59.417 -    }
  59.418 -    return err;
  59.419 -}
  59.420 -
  59.421 -static int child_string(Sxpr exp, Sxpr key, char **s){
  59.422 -    int err = 0;
  59.423 -    Sxpr val = sxpr_child_value(exp, key, ONONE);
  59.424 -    err = stringof(val, s);
  59.425 -    return err;
  59.426 -}
  59.427 -
  59.428 -#if 0
  59.429 -static int intof(Sxpr exp, int *v){
  59.430 -    int err = 0;
  59.431 -    char *s;
  59.432 -    unsigned long l;
  59.433 -    if(INTP(exp)){
  59.434 -        *v = OBJ_INT(exp);
  59.435 -    } else {
  59.436 -        err = stringof(exp, &s);
  59.437 -        if(err) goto exit;
  59.438 -        err = convert_atoul(s, &l);
  59.439 -        *v = (int)l;
  59.440 -    }
  59.441 - exit:
  59.442 -    return err;
  59.443 -}
  59.444 -
  59.445 -static int child_int(Sxpr exp, Sxpr key, int *v){
  59.446 -    int err = 0;
  59.447 -    Sxpr val = sxpr_child_value(exp, key, ONONE);
  59.448 -    err = intof(val, v);
  59.449 -    return err;
  59.450 -}
  59.451 -#endif
  59.452 -
  59.453 -static int vnetof(Sxpr exp, VnetId *v){
  59.454 -    int err = 0;
  59.455 -    char *s;
  59.456 -    err = stringof(exp, &s);
  59.457 -    if(err) goto exit;
  59.458 -    err = VnetId_aton(s, v);
  59.459 -  exit:
  59.460 -    return err;
  59.461 -}
  59.462 -
  59.463 -static int child_vnet(Sxpr exp, Sxpr key, VnetId *v){
  59.464 -    int err = 0;
  59.465 -    Sxpr val = sxpr_child_value(exp, key, ONONE);
  59.466 -    err = vnetof(val, v);
  59.467 -    return err;
  59.468 -}
  59.469 -
  59.470 -static int macof(Sxpr exp, unsigned char *v){
  59.471 -    int err = 0;
  59.472 -    char *s;
  59.473 -    err = stringof(exp, &s);
  59.474 -    if(err) goto exit;
  59.475 -    err = mac_aton(s, v);
  59.476 -  exit:
  59.477 -    return err;
  59.478 -}
  59.479 -
  59.480 -static int child_mac(Sxpr exp, Sxpr key, unsigned char *v){
  59.481 -    int err = 0;
  59.482 -    Sxpr val = sxpr_child_value(exp, key, ONONE);
  59.483 -    err = macof(val, v);
  59.484 -    return err;
  59.485 -}
  59.486 -
  59.487 -static int addrof(Sxpr exp, uint32_t *v){
  59.488 -    int err = 0;
  59.489 -    char *s;
  59.490 -    unsigned long w;
  59.491 -    err = stringof(exp, &s);
  59.492 -    if(err) goto exit;
  59.493 -    err = get_inet_addr(s, &w);
  59.494 -    if(err) goto exit;
  59.495 -    *v = (uint32_t)w;
  59.496 -  exit:
  59.497 -    return err;
  59.498 -}
  59.499 -
  59.500 -static int child_addr(Sxpr exp, Sxpr key, uint32_t *v){
  59.501 -    int err = 0;
  59.502 -    Sxpr val = sxpr_child_value(exp, key, ONONE);
  59.503 -    err = addrof(val, v);
  59.504 -    return err;
  59.505 -}
  59.506 -
  59.507 -/** Create a vnet.
  59.508 - * It is an error if a vnet with the same id exists.
  59.509 - *
  59.510 - * @param vnet vnet id
  59.511 - * @param device vnet device name
  59.512 - * @param security security level
  59.513 - * @return 0 on success, error code otherwise
  59.514 - */
  59.515 -static int ctrl_vnet_add(VnetId *vnet, char *device, int security){
  59.516 -    int err = 0;
  59.517 -    Vnet *vnetinfo = NULL;
  59.518 -
  59.519 -    if(strlen(device) >= IFNAMSIZ){
  59.520 -        err = -EINVAL;
  59.521 -        goto exit;
  59.522 -    }
  59.523 -    if(Vnet_lookup(vnet, &vnetinfo) == 0){
  59.524 -        err = -EEXIST;
  59.525 -        goto exit;
  59.526 -    }
  59.527 -    err = Vnet_alloc(&vnetinfo);
  59.528 -    if(err) goto exit;
  59.529 -    vnetinfo->vnet = *vnet;
  59.530 -    vnetinfo->security = security;
  59.531 -    strcpy(vnetinfo->device, device);
  59.532 -    err = Vnet_create(vnetinfo);
  59.533 -  exit:
  59.534 -    if(vnetinfo) Vnet_decref(vnetinfo);
  59.535 -    return err;
  59.536 -}
  59.537 -
  59.538 -/** Delete a vnet.
  59.539 - *
  59.540 - * @param vnet vnet id
  59.541 - * @return 0 on success, error code otherwise
  59.542 - */
  59.543 -static int ctrl_vnet_del(VnetId *vnet){
  59.544 -    int err = -ENOSYS;
  59.545 -    // Can't delete if there are any vifs on the vnet.
  59.546 -
  59.547 -    // Need to flush vif entries for the deleted vnet.
  59.548 -    // Need to flush varp entries for the deleted vnet.
  59.549 -    // Note that (un)register_netdev() hold rtnl_lock() around
  59.550 -    // (un)register_netdevice().
  59.551 -
  59.552 -    //Vnet_del(vnet);
  59.553 -    return err;
  59.554 -}
  59.555 -
  59.556 -/** Create an entry for a vif with the given vnet and vmac.
  59.557 - *
  59.558 - * @param vnet vnet id
  59.559 - * @param vmac mac address
  59.560 - * @return 0 on success, error code otherwise
  59.561 - */
  59.562 -static int ctrl_vif_add(VnetId *vnet, Vmac *vmac){
  59.563 -    int err = 0;
  59.564 -    Vnet *vnetinfo = NULL;
  59.565 -    Vif *vif = NULL;
  59.566 -
  59.567 -    dprintf(">\n");
  59.568 -    err = Vnet_lookup(vnet, &vnetinfo);
  59.569 -    if(err) goto exit;
  59.570 -    err = vif_create(vnet, vmac, &vif);
  59.571 -  exit:
  59.572 -    if(vnetinfo) Vnet_decref(vnetinfo);
  59.573 -    if(vif) vif_decref(vif);
  59.574 -    dprintf("< err=%d\n", err);
  59.575 -    return err;
  59.576 -}
  59.577 -
  59.578 -/** Delete a vif.
  59.579 - *
  59.580 - * @param vnet vnet id
  59.581 - * @param vmac mac address
  59.582 - * @return 0 on success, error code otherwise
  59.583 - */
  59.584 -static int ctrl_vif_del(VnetId *vnet, Vmac *vmac){
  59.585 -    int err = 0;
  59.586 -    Vnet *vnetinfo = NULL;
  59.587 -    Vif *vif = NULL;
  59.588 -
  59.589 -    dprintf(">\n");
  59.590 -    err = Vnet_lookup(vnet, &vnetinfo);
  59.591 -    if(err) goto exit;
  59.592 -    err = vif_lookup(vnet, vmac, &vif);
  59.593 -    if(err) goto exit;
  59.594 -    vif_remove(vnet, vmac);
  59.595 -  exit:
  59.596 -    if(vnetinfo) Vnet_decref(vnetinfo);
  59.597 -    if(vif) vif_decref(vif);
  59.598 -    dprintf("< err=%d\n", err);
  59.599 -    return err;
  59.600 -}
  59.601 -
  59.602 -/** (varp.print)
  59.603 - */
  59.604 -static int eval_varp_print(Sxpr exp){
  59.605 -    int err = 0;
  59.606 -    varp_print();
  59.607 -    return err;
  59.608 -}
  59.609 -
  59.610 -/** (varp.mcaddr (addr <addr>))
  59.611 - */
  59.612 -static int eval_varp_mcaddr(Sxpr exp){
  59.613 -    int err =0;
  59.614 -    Sxpr oaddr = intern("addr");
  59.615 -    uint32_t addr;
  59.616 -
  59.617 -    err = child_addr(exp, oaddr, &addr);
  59.618 -    if(err < 0) goto exit;
  59.619 -    varp_set_mcast_addr(addr);
  59.620 -  exit:
  59.621 -    return err;
  59.622 -}
  59.623 -
  59.624 -/** (varp.flush)
  59.625 - */
  59.626 -static int eval_varp_flush(Sxpr exp){
  59.627 -    int err = 0;
  59.628 -    varp_flush();
  59.629 -    return err;
  59.630 -}
  59.631 -
  59.632 -/** (vnet.add (id <id>)
  59.633 - *            [(vnetif <name>)]
  59.634 - *            [(security { none | auth | conf } )]
  59.635 - *  )
  59.636 - */
  59.637 -static int eval_vnet_add(Sxpr exp){
  59.638 -    int err = 0;
  59.639 -    Sxpr oid = intern("id");
  59.640 -    Sxpr osecurity = intern("security");
  59.641 -    Sxpr ovnetif = intern("vnetif");
  59.642 -    Sxpr csecurity;
  59.643 -    VnetId vnet = {};
  59.644 -    char *device = NULL;
  59.645 -    char dev[IFNAMSIZ] = {};
  59.646 -    char *security = NULL;
  59.647 -    int sec;
  59.648 -
  59.649 -    err = child_vnet(exp, oid, &vnet);
  59.650 -    if(err) goto exit;
  59.651 -    child_string(exp, ovnetif, &device);
  59.652 -    if(!device){
  59.653 -        snprintf(dev, IFNAMSIZ-1, "vnif%04x", ntohs(vnet.u.vnet16[7]));
  59.654 -        device = dev;
  59.655 -    }
  59.656 -    csecurity = sxpr_child_value(exp, osecurity, intern("none"));
  59.657 -    err = stringof(csecurity, &security);
  59.658 -    if(err) goto exit;
  59.659 -    if(strcmp(security, "none")==0){
  59.660 -        sec = 0;
  59.661 -    } else if(strcmp(security, "auth")==0){
  59.662 -        sec = SA_AUTH;
  59.663 -    } else if(strcmp(security, "conf")==0){
  59.664 -        sec = SA_CONF;
  59.665 -    } else {
  59.666 -        err = -EINVAL;
  59.667 -        goto exit;
  59.668 -    }
  59.669 -    err = ctrl_vnet_add(&vnet, device, sec);
  59.670 - exit:
  59.671 -    dprintf("< err=%d\n", err);
  59.672 -    return err;
  59.673 -}
  59.674 -
  59.675 -/** Delete a vnet.
  59.676 - *
  59.677 - * (vnet.del (id <id>))
  59.678 - *
  59.679 - * @param vnet vnet id
  59.680 - * @return 0 on success, error code otherwise
  59.681 - */
  59.682 -static int eval_vnet_del(Sxpr exp){
  59.683 -    int err = 0;
  59.684 -    Sxpr oid = intern("id");
  59.685 -    VnetId vnet = {};
  59.686 -
  59.687 -    err = child_vnet(exp, oid, &vnet);
  59.688 -    if(err) goto exit;
  59.689 -    err = ctrl_vnet_del(&vnet);
  59.690 -  exit:
  59.691 -    return err;
  59.692 -}
  59.693 -
  59.694 -/** (vif.add (vnet <vnet>) (vmac <macaddr>))
  59.695 - */
  59.696 -static int eval_vif_add(Sxpr exp){
  59.697 -    int err = 0;
  59.698 -    Sxpr ovnet = intern("vnet");
  59.699 -    Sxpr ovmac = intern("vmac");
  59.700 -    VnetId vnet = {};
  59.701 -    Vmac vmac = {};
  59.702 -
  59.703 -    err = child_vnet(exp, ovnet, &vnet);
  59.704 -    if(err) goto exit;
  59.705 -    err = child_mac(exp, ovmac, vmac.mac);
  59.706 -    if(err) goto exit;
  59.707 -    err = ctrl_vif_add(&vnet, &vmac);
  59.708 -  exit:
  59.709 -    return err;
  59.710 -}
  59.711 -
  59.712 -/** (vif.del (vnet <vnet>) (vmac <macaddr>))
  59.713 - */
  59.714 -static int eval_vif_del(Sxpr exp){
  59.715 -    int err = 0;
  59.716 -    Sxpr ovnet = intern("vnet");
  59.717 -    Sxpr ovmac = intern("vmac");
  59.718 -    VnetId vnet = {};
  59.719 -    Vmac vmac = {};
  59.720 -
  59.721 -    err = child_vnet(exp, ovnet, &vnet);
  59.722 -    if(err) goto exit;
  59.723 -    err = child_mac(exp, ovmac, vmac.mac);
  59.724 -    if(err) goto exit;
  59.725 -    err = ctrl_vif_del(&vnet, &vmac);
  59.726 -  exit:
  59.727 -    return err;
  59.728 -}
  59.729 -
  59.730 -typedef struct SxprEval {
  59.731 -    Sxpr elt;
  59.732 -    int (*fn)(Sxpr);
  59.733 -} SxprEval;
  59.734 -
  59.735 -static int eval(Sxpr exp){
  59.736 -    int err = 0;
  59.737 -    SxprEval defs[] = {
  59.738 -        { intern("varp.print"),   eval_varp_print   },
  59.739 -        { intern("varp.mcaddr"),  eval_varp_mcaddr  },
  59.740 -        { intern("varp.flush"),   eval_varp_flush   },
  59.741 -        { intern("vif.add"),      eval_vif_add      },
  59.742 -        { intern("vif.del"),      eval_vif_del      },
  59.743 -        { intern("vnet.add"),     eval_vnet_add     },
  59.744 -        { intern("vnet.del"),     eval_vnet_del     },
  59.745 -        { ONONE, NULL } };
  59.746 -    SxprEval *def;
  59.747 -
  59.748 -    iprintf("> "); objprint(iostdout, exp, 0); IOStream_print(iostdout, "\n");
  59.749 -    err = -ENOSYS;
  59.750 -    for(def = defs; !NONEP(def->elt); def++){
  59.751 -        if(sxpr_elementp(exp, def->elt)){
  59.752 -            err = def->fn(exp);
  59.753 -            break;
  59.754 -        }
  59.755 -    }
  59.756 -    iprintf("< err=%d\n", err);
  59.757 -    return err;
  59.758 -}
  59.759 -
  59.760  void __init ProcFS_init(void){
  59.761      ProcEntry *root_entry;
  59.762      ProcEntry *policy_entry;
  59.763 +    ProcEntry *vnets_entry;
  59.764 +    ProcEntry *vifs_entry;
  59.765 +    ProcEntry *peers_entry;
  59.766 +    ProcEntry *varp_entry;
  59.767  
  59.768      dprintf(">\n");
  59.769      root_entry = ProcFS_mkdir(MODULE_ROOT, NULL);
  59.770      if(!root_entry) goto exit;
  59.771 -    policy_entry = ProcFS_register("policy", root_entry, VNET_POLICY);
  59.772 +    policy_entry = ProcFS_register("policy", root_entry, VNET_POLICY, &proc_policy_ops);
  59.773 +    vnets_entry = ProcFS_register("vnets", root_entry, VNET_VNETS, &proc_vnets_ops);
  59.774 +    vifs_entry = ProcFS_register("vifs", root_entry, VNET_VIFS, &proc_vifs_ops);
  59.775 +    peers_entry = ProcFS_register("peers", root_entry, VNET_PEERS, &proc_peers_ops);
  59.776 +    varp_entry = ProcFS_register("varp", root_entry, VNET_VARP, &proc_varp_ops);
  59.777    exit:
  59.778      dprintf("<\n");
  59.779  }
    60.1 --- a/tools/vnet/vnetd/Makefile	Thu Feb 09 16:09:00 2006 +0100
    60.2 +++ b/tools/vnet/vnetd/Makefile	Thu Feb 09 16:12:11 2006 +0100
    60.3 @@ -29,6 +29,7 @@ VNETD_INSTALL_DIR = /usr/sbin
    60.4  
    60.5  INCLUDES += -I$(LIBXUTIL_DIR)
    60.6  INCLUDES += -I$(VNET_MODULE_DIR)
    60.7 +INCLUDES += -I$(shell pwd)
    60.8  
    60.9  #----------------------------------------------------------------------------
   60.10  # GC.
   60.11 @@ -37,8 +38,12 @@ INCLUDES += -I$(GC_INCLUDE)
   60.12  #LIBS += -L$(GC_LIB_DIR)
   60.13  CPPFLAGS += -D USE_GC
   60.14  
   60.15 +# Sometimes linux/atomic.h is not #ifdef __KERNEL__.
   60.16 +CPPFLAGS += -D __ARCH_I386_ATOMIC__
   60.17 +
   60.18  #----------------------------------------------------------------------------
   60.19  CFLAGS += -g
   60.20 +CFLAGS += -O2
   60.21  CFLAGS += -Wall
   60.22  CFLAGS += $(INCLUDES) $(LIBS)
   60.23  
   60.24 @@ -49,6 +54,7 @@ CFLAGS += -Wp,-MD,.$(@F).d
   60.25  PROG_DEP = .*.d
   60.26  
   60.27  vpath %.c $(LIBXUTIL_DIR)
   60.28 +vpath %.c $(VNET_MODULE_DIR)
   60.29  
   60.30  IPATHS:=$(INCLUDES:-I=)
   60.31  vpath %.h $(IPATHS)
   60.32 @@ -56,12 +62,25 @@ vpath %.h $(IPATHS)
   60.33  #----------------------------------------------------------------------------
   60.34  VNETD_SRC:=
   60.35  VNETD_SRC+= connection.c
   60.36 -VNETD_SRC+= marshal.c
   60.37  VNETD_SRC+= select.c
   60.38  VNETD_SRC+= timer.c
   60.39 -VNETD_SRC+= vcache.c
   60.40 +VNETD_SRC+= spinlock.c
   60.41 +VNETD_SRC+= skbuff.c
   60.42  VNETD_SRC+= vnetd.c
   60.43  
   60.44 +VNETD_SRC+= skb_util.c
   60.45 +VNETD_SRC+= sxpr_util.c
   60.46 +VNETD_SRC+= timer_util.c
   60.47 +VNETD_SRC+= etherip.c
   60.48 +VNETD_SRC+= vnet.c
   60.49 +VNETD_SRC+= vnet_eval.c
   60.50 +VNETD_SRC+= vnet_forward.c
   60.51 +VNETD_SRC+= vif.c
   60.52 +VNETD_SRC+= tunnel.c
   60.53 +VNETD_SRC+= sa.c
   60.54 +VNETD_SRC+= varp.c
   60.55 +
   60.56 +#----------------------------------------------------------------------------
   60.57  LIB_SRC:=
   60.58  LIB_SRC+= allocate.c
   60.59  LIB_SRC+= enum.c
   60.60 @@ -72,6 +91,7 @@ LIB_SRC+= lexis.c
   60.61  LIB_SRC+= socket_stream.c
   60.62  LIB_SRC+= string_stream.c
   60.63  LIB_SRC+= sxpr.c
   60.64 +LIB_SRC+= sxpr_parser.c
   60.65  LIB_SRC+= sys_net.c
   60.66  LIB_SRC+= sys_string.c
   60.67  LIB_SRC+= util.c
    61.1 --- a/tools/vnet/vnetd/connection.c	Thu Feb 09 16:09:00 2006 +0100
    61.2 +++ b/tools/vnet/vnetd/connection.c	Thu Feb 09 16:12:11 2006 +0100
    61.3 @@ -27,9 +27,9 @@
    61.4  #include "file_stream.h"
    61.5  #include "socket_stream.h"
    61.6  
    61.7 -#define DEBUG
    61.8 +#define MODULE_NAME "conn"
    61.9 +#define DEBUG 1
   61.10  #undef DEBUG
   61.11 -#define MODULE_NAME "conn"
   61.12  #include "debug.h"
   61.13  
   61.14  /** Initialize a file stream from a file desciptor.
   61.15 @@ -40,7 +40,7 @@
   61.16   * @param io return parameter for the stream
   61.17   * @return 0 on success, error code otherwise
   61.18   */
   61.19 -static int stream_init(int fd, const char *mode, int buffered, IOStream **io){
   61.20 +int stream_init(int fd, const char *mode, int buffered, IOStream **io){
   61.21      int err = 0;
   61.22      *io = file_stream_fdopen(fd, mode);
   61.23      if(!*io){
   61.24 @@ -65,7 +65,7 @@ static int stream_init(int fd, const cha
   61.25      return err;
   61.26  }
   61.27  
   61.28 -ConnList * ConnList_add(Conn *conn, ConnList *l){
   61.29 +ConnList * ConnList_add(ConnList *l, Conn *conn){
   61.30      ConnList *v;
   61.31      v = ALLOCATE(ConnList);
   61.32      v->conn = conn;
   61.33 @@ -73,7 +73,58 @@ ConnList * ConnList_add(Conn *conn, Conn
   61.34      return v;
   61.35  }
   61.36  
   61.37 -Conn *Conn_new(int (*fn)(Conn *), void *data){
   61.38 +ConnList * ConnList_del(ConnList *l, Conn *conn){
   61.39 +    ConnList *prev, *curr, *next;
   61.40 +    for(prev = NULL, curr = l; curr; prev = curr, curr = next){
   61.41 +        next = curr->next;
   61.42 +        if(curr->conn == conn){
   61.43 +            if(prev){
   61.44 +                prev->next = curr->next;
   61.45 +            } else {
   61.46 +                l = curr->next;
   61.47 +            }
   61.48 +        }
   61.49 +    }
   61.50 +    return l;
   61.51 +}
   61.52 +
   61.53 +void ConnList_close(ConnList *l){
   61.54 +    for( ; l; l = l->next){
   61.55 +        Conn_close(l->conn);
   61.56 +    }
   61.57 +}
   61.58 +    
   61.59 +void ConnList_select(ConnList *l, SelectSet *set){
   61.60 +    for( ; l; l = l->next){
   61.61 +        Conn_select(l->conn, set);
   61.62 +    }
   61.63 +}
   61.64 +
   61.65 +/** Handle connections according to a select set.
   61.66 + *
   61.67 + * @param set indicates ready connections
   61.68 + */
   61.69 +ConnList * ConnList_handle(ConnList *l, SelectSet *set){
   61.70 +    ConnList *prev, *curr, *next;
   61.71 +    Conn *conn;
   61.72 +    int err;
   61.73 +
   61.74 +    for(prev = NULL, curr = l; curr; prev = curr, curr = next){
   61.75 +        next = curr->next;
   61.76 +        conn = curr->conn;
   61.77 +        err = Conn_handle(conn, set);
   61.78 +        if(err){
   61.79 +            if(prev){
   61.80 +                prev->next = curr->next;
   61.81 +            } else {
   61.82 +                l = curr->next;
   61.83 +            }
   61.84 +        }
   61.85 +    }
   61.86 +    return l;
   61.87 +}
   61.88 +
   61.89 +Conn *Conn_new(int (*fn)(Conn *conn, int mode), void *data){
   61.90      Conn *conn;
   61.91      conn = ALLOCATE(Conn);
   61.92      conn->fn = fn;
   61.93 @@ -81,22 +132,40 @@ Conn *Conn_new(int (*fn)(Conn *), void *
   61.94      return conn;
   61.95  }
   61.96  
   61.97 -int Conn_handle(Conn *conn){
   61.98 +int Conn_handler(Conn *conn, int mode){
   61.99      int err = 0;
  61.100      dprintf(">\n");
  61.101      if(conn->fn){
  61.102 -        err = conn->fn(conn);
  61.103 +        err = conn->fn(conn, mode);
  61.104      } else {
  61.105          dprintf("> no handler\n");
  61.106          err = -ENOSYS;
  61.107      }
  61.108      if(err < 0){
  61.109 +        dprintf("> err=%d, closing %d\n", err, conn->sock);
  61.110          Conn_close(conn);
  61.111      }
  61.112      dprintf("< err=%d\n", err);
  61.113      return err;
  61.114  }
  61.115 -    
  61.116 +
  61.117 +int Conn_handle(Conn *conn, SelectSet *set){
  61.118 +    int err = 0;
  61.119 +    int mode = SelectSet_in(set, conn->sock);
  61.120 +
  61.121 +    dprintf("> sock=%d mode=%d\n", conn->sock, mode);
  61.122 +    if(mode){
  61.123 +        err = Conn_handler(conn, mode);
  61.124 +
  61.125 +    }
  61.126 +    return err;
  61.127 +}
  61.128 +
  61.129 +void Conn_select(Conn *conn, SelectSet *set){
  61.130 +    dprintf("> sock=%d\n", conn->sock);
  61.131 +    SelectSet_add(set, conn->sock, conn->mode);
  61.132 +}
  61.133 +
  61.134  /** Initialize a connection.
  61.135   *
  61.136   * @param conn connection
  61.137 @@ -104,10 +173,11 @@ int Conn_handle(Conn *conn){
  61.138   * @param ipaddr ip address
  61.139   * @return 0 on success, error code otherwise
  61.140   */
  61.141 -int Conn_init(Conn *conn, int sock, int type, struct sockaddr_in addr){
  61.142 +int Conn_init(Conn *conn, int sock, int type, int mode, struct sockaddr_in addr){
  61.143      int err = 0;
  61.144      conn->addr = addr;
  61.145      conn->type = type;
  61.146 +    conn->mode = mode;
  61.147      conn->sock = sock;
  61.148      if(type == SOCK_STREAM){
  61.149          err = stream_init(sock, "r", 0, &conn->in);
  61.150 @@ -149,9 +219,12 @@ int Conn_connect(Conn *conn, int socktyp
  61.151      addr_in.sin_port = port;
  61.152      err = connect(sock, addr, addr_n);
  61.153      if(err) goto exit;
  61.154 -    err = Conn_init(conn, sock, socktype, addr_in);
  61.155 +    err = Conn_init(conn, sock, socktype, 0, addr_in);
  61.156    exit:
  61.157 -    if(err) eprintf("< err=%d\n", err);
  61.158 +    if(err){
  61.159 +        perror("Conn_connect");
  61.160 +        eprintf("< err=%d\n", err);
  61.161 +    }
  61.162      return err;
  61.163  }
  61.164  
  61.165 @@ -165,3 +238,175 @@ void Conn_close(Conn *conn){
  61.166      if(conn->out) IOStream_close(conn->out);
  61.167      shutdown(conn->sock, 2);
  61.168  }
  61.169 +
  61.170 +/** Set socket option to reuse address.
  61.171 + */
  61.172 +int setsock_reuse(int sock, int val){
  61.173 +    int err = 0;
  61.174 +    err = setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val));
  61.175 +    if(err < 0){
  61.176 +        err = -errno;
  61.177 +        perror("setsockopt SO_REUSEADDR");
  61.178 +    }
  61.179 +    return err;
  61.180 +}
  61.181 +
  61.182 +/** Set socket broadcast option.
  61.183 + */
  61.184 +int setsock_broadcast(int sock, int val){
  61.185 +    int err = 0;
  61.186 +    err = setsockopt(sock, SOL_SOCKET, SO_BROADCAST, &val, sizeof(val));
  61.187 +    if(err < 0){
  61.188 +        err = -errno;
  61.189 +        perror("setsockopt SO_BROADCAST");
  61.190 +    }
  61.191 +    return err;
  61.192 +}
  61.193 +
  61.194 +/** Join a socket to a multicast group.
  61.195 + */
  61.196 +int setsock_multicast(int sock, uint32_t iaddr, uint32_t maddr){
  61.197 +    int err = 0;
  61.198 +    struct ip_mreqn mreq = {};
  61.199 +    int mloop = 0;
  61.200 +    // See 'man 7 ip' for these options.
  61.201 +    mreq.imr_multiaddr.s_addr = maddr;       // IP multicast address.
  61.202 +    mreq.imr_address.s_addr   = iaddr;       // Interface IP address.
  61.203 +    mreq.imr_ifindex = 0;                    // Interface index (0 means any).
  61.204 +    err = setsockopt(sock, SOL_IP, IP_MULTICAST_LOOP, &mloop, sizeof(mloop));
  61.205 +    if(err < 0){
  61.206 +        err = -errno;
  61.207 +        perror("setsockopt IP_MULTICAST_LOOP");
  61.208 +        goto exit;
  61.209 +    }
  61.210 +    err = setsockopt(sock, SOL_IP, IP_ADD_MEMBERSHIP, &mreq, sizeof(mreq));
  61.211 +    if(err < 0){
  61.212 +        err = -errno;
  61.213 +        perror("setsockopt IP_ADD_MEMBERSHIP");
  61.214 +        goto exit;
  61.215 +    }
  61.216 +  exit:
  61.217 +    return err;
  61.218 +}
  61.219 +
  61.220 +/** Set a socket's multicast ttl (default is 1).
  61.221 + */
  61.222 +int setsock_multicast_ttl(int sock, uint8_t ttl){
  61.223 +    int err = 0;
  61.224 +    err = setsockopt(sock, SOL_IP, IP_MULTICAST_TTL, &ttl, sizeof(ttl));
  61.225 +    if(err < 0){
  61.226 +        err = -errno;
  61.227 +        perror("setsockopt IP_MULTICAST_TTL");
  61.228 +    }
  61.229 +    return err;
  61.230 +}
  61.231 +
  61.232 +int setsock_pktinfo(int sock, int val){
  61.233 +    int err = 0;
  61.234 +    err = setsockopt(sock, SOL_IP, IP_PKTINFO, &val, sizeof(val));
  61.235 +    if(err < 0){
  61.236 +        err = -errno;
  61.237 +        perror("setsockopt IP_PKTINFO");
  61.238 +    }
  61.239 +    return err;
  61.240 +}
  61.241 +
  61.242 +char * socket_flags(int flags){
  61.243 +    static char s[6];
  61.244 +    int i = 0;
  61.245 +    s[i++] = (flags & VSOCK_CONNECT   ? 'c' : '-');
  61.246 +    s[i++] = (flags & VSOCK_BIND      ? 'b' : '-');
  61.247 +    s[i++] = (flags & VSOCK_REUSE     ? 'r' : '-');
  61.248 +    s[i++] = (flags & VSOCK_BROADCAST ? 'B' : '-');
  61.249 +    s[i++] = (flags & VSOCK_MULTICAST ? 'M' : '-');
  61.250 +    s[i++] = '\0';
  61.251 +    return s;
  61.252 +}
  61.253 +
  61.254 +/** Create a socket.
  61.255 + * The flags can include VSOCK_REUSE, VSOCK_BROADCAST, VSOCK_CONNECT.
  61.256 + *
  61.257 + * @param socktype socket type
  61.258 + * @param saddr address
  61.259 + * @param port port
  61.260 + * @param flags flags
  61.261 + * @param val return value for the socket connection
  61.262 + * @return 0 on success, error code otherwise
  61.263 + */
  61.264 +int create_socket(int socktype, uint32_t saddr, uint32_t port, int flags, int *val){
  61.265 +    int err = 0;
  61.266 +    int sock = 0;
  61.267 +    struct sockaddr_in addr_in;
  61.268 +    struct sockaddr *addr = (struct sockaddr *)&addr_in;
  61.269 +    socklen_t addr_n = sizeof(addr_in);
  61.270 +    int reuse, bcast;
  61.271 +
  61.272 +    //dprintf(">\n");
  61.273 +    reuse = (flags & VSOCK_REUSE);
  61.274 +    bcast = (flags & VSOCK_BROADCAST);
  61.275 +    addr_in.sin_family      = AF_INET;
  61.276 +    addr_in.sin_addr.s_addr = saddr;
  61.277 +    addr_in.sin_port        = port;
  61.278 +    dprintf("> flags=%s addr=%s port=%d\n", socket_flags(flags),
  61.279 +            inet_ntoa(addr_in.sin_addr), ntohs(addr_in.sin_port));
  61.280 +
  61.281 +    sock = socket(AF_INET, socktype, 0);
  61.282 +    if(sock < 0){
  61.283 +        err = -errno;
  61.284 +        goto exit;
  61.285 +    }
  61.286 +    if(reuse){
  61.287 +        err = setsock_reuse(sock, reuse);
  61.288 +        if(err < 0) goto exit;
  61.289 +    }
  61.290 +    if(bcast){
  61.291 +        err = setsock_broadcast(sock, bcast);
  61.292 +        if(err < 0) goto exit;
  61.293 +    }
  61.294 +    if(flags & VSOCK_CONNECT){
  61.295 +        err = connect(sock, addr, addr_n);
  61.296 +        if(err < 0){
  61.297 +            err = -errno;
  61.298 +            perror("connect");
  61.299 +            goto exit;
  61.300 +        }
  61.301 +    }
  61.302 +    if(flags & VSOCK_BIND){
  61.303 +        err = bind(sock, addr, addr_n);
  61.304 +        if(err < 0){
  61.305 +            err = -errno;
  61.306 +            perror("bind");
  61.307 +            goto exit;
  61.308 +        }
  61.309 +    }
  61.310 +    {
  61.311 +        struct sockaddr_in self = {};
  61.312 +        socklen_t self_n = sizeof(self);
  61.313 +        getsockname(sock, (struct sockaddr *)&self, &self_n);
  61.314 +        dprintf("> sockname sock=%d addr=%s port=%d reuse=%d bcast=%d\n",
  61.315 +                sock, inet_ntoa(self.sin_addr), ntohs(self.sin_port),
  61.316 +                reuse, bcast);
  61.317 +    }
  61.318 +  exit:
  61.319 +    *val = (err ? -1 : sock);
  61.320 +    //dprintf("< err=%d\n", err);
  61.321 +    return err;
  61.322 +}
  61.323 +
  61.324 +int Conn_socket(int socktype, uint32_t saddr, uint32_t port, int flags, Conn **val){
  61.325 +    int err;
  61.326 +    int sock;
  61.327 +    struct sockaddr_in addr_in;
  61.328 +    Conn *conn;
  61.329 +
  61.330 +    err = create_socket(socktype, saddr, port, flags, &sock);
  61.331 +    if(err) goto exit;
  61.332 +    conn = Conn_new(NULL, NULL);
  61.333 +    addr_in.sin_family      = AF_INET;
  61.334 +    addr_in.sin_addr.s_addr = saddr;
  61.335 +    addr_in.sin_p