ia64/xen-unstable

changeset 3079:0dfcf477fdd3

bitkeeper revision 1.1159.183.3 (41a2188bAFjxwwkR-Q5G7XglkWtKfQ)

Add vnet files.
author mjw@wray-m-3.hpl.hp.com
date Mon Nov 22 16:49:15 2004 +0000 (2004-11-22)
parents beb02da3f69f
children 5281b60ddd27
files .rootkeys BitKeeper/etc/ignore tools/vnet/00README tools/vnet/Makefile tools/vnet/doc/vnet-module.txt tools/vnet/doc/vnet-xend.txt tools/vnet/examples/Makefile tools/vnet/examples/network-vnet tools/vnet/examples/vnet97.sxp tools/vnet/examples/vnet98.sxp tools/vnet/examples/vnet99.sxp tools/vnet/vnet-module/00README tools/vnet/vnet-module/Makefile tools/vnet/vnet-module/Makefile-2.4 tools/vnet/vnet-module/Makefile-2.6 tools/vnet/vnet-module/Makefile.ver tools/vnet/vnet-module/Makefile.vnet tools/vnet/vnet-module/esp.c tools/vnet/vnet-module/esp.h tools/vnet/vnet-module/etherip.c tools/vnet/vnet-module/etherip.h tools/vnet/vnet-module/if_etherip.h tools/vnet/vnet-module/if_varp.h tools/vnet/vnet-module/linux/pfkeyv2.h tools/vnet/vnet-module/random.c tools/vnet/vnet-module/random.h tools/vnet/vnet-module/sa.c tools/vnet/vnet-module/sa.h tools/vnet/vnet-module/sa_algorithm.c tools/vnet/vnet-module/sa_algorithm.h tools/vnet/vnet-module/skb_context.c tools/vnet/vnet-module/skb_context.h tools/vnet/vnet-module/skb_util.c tools/vnet/vnet-module/skb_util.h tools/vnet/vnet-module/tunnel.c tools/vnet/vnet-module/tunnel.h tools/vnet/vnet-module/varp.c tools/vnet/vnet-module/varp.h tools/vnet/vnet-module/varp_socket.c tools/vnet/vnet-module/vif.c tools/vnet/vnet-module/vif.h tools/vnet/vnet-module/vnet.c tools/vnet/vnet-module/vnet.h tools/vnet/vnet-module/vnet_dev.c tools/vnet/vnet-module/vnet_dev.h tools/vnet/vnet-module/vnet_ioctl.c tools/vnet/vnet-module/vnet_ioctl.h tools/vnet/vnetd/Makefile tools/vnet/vnetd/connection.c tools/vnet/vnetd/connection.h tools/vnet/vnetd/marshal.c tools/vnet/vnetd/marshal.h tools/vnet/vnetd/select.c tools/vnet/vnetd/select.h tools/vnet/vnetd/timer.c tools/vnet/vnetd/timer.h tools/vnet/vnetd/vcache.c tools/vnet/vnetd/vcache.h tools/vnet/vnetd/vnetd.c tools/vnet/vnetd/vnetd.h
line diff
     1.1 --- a/.rootkeys	Mon Nov 22 16:41:50 2004 +0000
     1.2 +++ b/.rootkeys	Mon Nov 22 16:49:15 2004 +0000
     1.3 @@ -543,6 +543,64 @@ 4104ffca-jPHLVOrW0n0VghEXXtKxg tools/sv/
     1.4  40fcefb3yMSrZvApO9ToIi-iQwnchA tools/sv/images/xen.png
     1.5  41013a83z27rKvWIxAfUBMVZ1eDCDg tools/sv/inc/script.js
     1.6  40fcefb3zGC9XNBkSwTEobCoq8YClA tools/sv/inc/style.css
     1.7 +41a21888_WlknVWjSxb32Fo13_ujsw tools/vnet/00README
     1.8 +41a21888bOiOJc7blzRbe4MNJoaYTw tools/vnet/Makefile
     1.9 +41a21888mg2k5HeiVjlQYEtJBZT4Qg tools/vnet/doc/vnet-module.txt
    1.10 +41a21888cuxfT8wjCdRR6V1lqf5NtA tools/vnet/doc/vnet-xend.txt
    1.11 +41a21888xEQJAIGktS6XQ4xz2TyA5g tools/vnet/examples/Makefile
    1.12 +41a21888FGQhPR5LJ1GRtOSIIN3QEw tools/vnet/examples/network-vnet
    1.13 +41a21888QPgKrulCfR9SY_pxZKU0KA tools/vnet/examples/vnet97.sxp
    1.14 +41a21888Gm0UBs1i7HqveT7Yz0u8DQ tools/vnet/examples/vnet98.sxp
    1.15 +41a21888r4oGPuGv2Lxl-thgV3H54w tools/vnet/examples/vnet99.sxp
    1.16 +41a21888c9TCRlUwJS9WBjB3e9aWgg tools/vnet/vnet-module/00README
    1.17 +41a21888K2ItolEkksc1MUqyTDI_Kg tools/vnet/vnet-module/Makefile
    1.18 +41a21888mJsFJD7bVMm-nrnWnalGBw tools/vnet/vnet-module/Makefile-2.4
    1.19 +41a21888Znze3-UCCBZ-Nxpj-bNeHA tools/vnet/vnet-module/Makefile-2.6
    1.20 +41a21889fwc1judJ7DYvyEviSJ3TPg tools/vnet/vnet-module/Makefile.ver
    1.21 +41a21889m_sYkdODF3j5uhMP-Guy9Q tools/vnet/vnet-module/Makefile.vnet
    1.22 +41a21889bXW2lC28U6KS_s5tOJ_W9Q tools/vnet/vnet-module/esp.c
    1.23 +41a21889L2MfLDsUFQxstt-0frIVmw tools/vnet/vnet-module/esp.h
    1.24 +41a21889V1jOsB2JExI-XQl720WHwg tools/vnet/vnet-module/etherip.c
    1.25 +41a21889IpMYbNufHMDXe2ndNw4JxA tools/vnet/vnet-module/etherip.h
    1.26 +41a21889LT9TNqO2EvTFIUTujrkX9w tools/vnet/vnet-module/if_etherip.h
    1.27 +41a21889PESythGZFG6kmSoOkkN2Nw tools/vnet/vnet-module/if_varp.h
    1.28 +41a21889nCPEomHqOyQ4vnhEm4II4g tools/vnet/vnet-module/linux/pfkeyv2.h
    1.29 +41a21889A_fw4pRmCbBfZdtRunM5Eg tools/vnet/vnet-module/random.c
    1.30 +41a218899Xy2dPKSu3pkuqaqkfKMTA tools/vnet/vnet-module/random.h
    1.31 +41a21889rIH5S1dv8ygdSsTGNlg0JA tools/vnet/vnet-module/sa.c
    1.32 +41a218896Z4vxy6gnV9h0fWRWu0lKQ tools/vnet/vnet-module/sa.h
    1.33 +41a21889qFD8BTbDpB55uVmSVDEsgw tools/vnet/vnet-module/sa_algorithm.c
    1.34 +41a21889r2AwTe-OCSSVMxBzz8uDtw tools/vnet/vnet-module/sa_algorithm.h
    1.35 +41a21889tvjtL7O8tMveVB8MdSKPnQ tools/vnet/vnet-module/skb_context.c
    1.36 +41a21889lD_QOUz2Msd7fB5rJQzfxA tools/vnet/vnet-module/skb_context.h
    1.37 +41a21889F1r1xnJamzdeuClR8MNwQg tools/vnet/vnet-module/skb_util.c
    1.38 +41a21889sS4bjVqEna24sS8NpV7SRA tools/vnet/vnet-module/skb_util.h
    1.39 +41a21889MDawEK3J_f_oAGnZznhG2w tools/vnet/vnet-module/tunnel.c
    1.40 +41a218896TlHXpVVqF50uz_u_WMXRw tools/vnet/vnet-module/tunnel.h
    1.41 +41a21889nQYbJbqrOApg_RbkwPtXGg tools/vnet/vnet-module/varp.c
    1.42 +41a21889Pev5MJlqqass6CxN4mmvPw tools/vnet/vnet-module/varp.h
    1.43 +41a21889GbsHHfkpA-PkOvltfEwpMA tools/vnet/vnet-module/varp_socket.c
    1.44 +41a21889sknn8zd5xCJlpQbs7MvxKg tools/vnet/vnet-module/vif.c
    1.45 +41a21889VsKKWpe6rcXOSLPy2FuNWQ tools/vnet/vnet-module/vif.h
    1.46 +41a21889dgkOyuSTVqy7D8TPIzrUyw tools/vnet/vnet-module/vnet.c
    1.47 +41a21889ocAdwk7V1nNt4iBpmYW-Mw tools/vnet/vnet-module/vnet.h
    1.48 +41a21889YrTiC0ArJSGFtiaHz2j1qQ tools/vnet/vnet-module/vnet_dev.c
    1.49 +41a21889rHT4vrC4VAfk7-xP_K5aBg tools/vnet/vnet-module/vnet_dev.h
    1.50 +41a21889qJj6GjT2f5hMHRvPS1AW4w tools/vnet/vnet-module/vnet_ioctl.c
    1.51 +41a2188a8W4xYB0LYm512agtoEv52g tools/vnet/vnet-module/vnet_ioctl.h
    1.52 +41a2188aFF_1T9OgpqUjjjaCqKB8lw tools/vnet/vnetd/Makefile
    1.53 +41a2188a9j84qS4CxqMLVCvyGpA93w tools/vnet/vnetd/connection.c
    1.54 +41a2188atexNEami9TNVYNkRSb7Bqg tools/vnet/vnetd/connection.h
    1.55 +41a2188abgYpITSrWoMGHHrM56nklw tools/vnet/vnetd/marshal.c
    1.56 +41a2188aUbOi5tAYwOS4aPixo1EGwQ tools/vnet/vnetd/marshal.h
    1.57 +41a2188aDJlSVB1s_st2MSWxW8kMwg tools/vnet/vnetd/select.c
    1.58 +41a2188aE9LUDdSSwNT3BWVWCvGSnQ tools/vnet/vnetd/select.h
    1.59 +41a2188aTbMKv_Eig12dSrBUEBl1Jg tools/vnet/vnetd/timer.c
    1.60 +41a2188aIzBGqQ6DUVzCxfBsN0Q6Ww tools/vnet/vnetd/timer.h
    1.61 +41a2188aIf3Xk6uvk7KzjdpOsflAEw tools/vnet/vnetd/vcache.c
    1.62 +41a2188ar6_vOO3_tEJQjmFVU3409A tools/vnet/vnetd/vcache.h
    1.63 +41a2188aETrGU60X9WtGhYVfU7z0Pw tools/vnet/vnetd/vnetd.c
    1.64 +41a2188ahYjemudGyB7078AWMFR-0w tools/vnet/vnetd/vnetd.h
    1.65  4194e861IgTabTt8HOuh143QIJFD1Q tools/x2d2/Makefile
    1.66  4194e861M2gcBz4i94cQYpqzi8n6UA tools/x2d2/cntrl_con.c
    1.67  4194e8612TrrMvC8ZlA4h2ZYCPWz4g tools/x2d2/minixend.c
     2.1 --- a/BitKeeper/etc/ignore	Mon Nov 22 16:41:50 2004 +0000
     2.2 +++ b/BitKeeper/etc/ignore	Mon Nov 22 16:49:15 2004 +0000
     2.3 @@ -59,6 +59,13 @@ tools/check/.*
     2.4  tools/libxc/xen/*
     2.5  tools/misc/miniterm/miniterm
     2.6  tools/misc/xen_cpuperf
     2.7 +tools/vnet/gc
     2.8 +tools/vnet/gc*/*
     2.9 +tools/vnet/vnet-module/.tmp_versions/*
    2.10 +tools/vnet/vnet-module/.*.cmd
    2.11 +tools/vnet/vnet-module/*.ko
    2.12 +tools/vnet/vnet-module/vnet_module.mod.*
    2.13 +tools/vnetd/vnetd
    2.14  tools/web-shutdown.tap
    2.15  tools/xentrace/xentrace
    2.16  tools/xfrd/xfrd
     3.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     3.2 +++ b/tools/vnet/00README	Mon Nov 22 16:49:15 2004 +0000
     3.3 @@ -0,0 +1,10 @@
     3.4 +This directory contains the implementation of vnets:
     3.5 +virtual private networks for virtual machines.
     3.6 +See doc/ for more information and examples/ for example
     3.7 +configurations.
     3.8 +
     3.9 +The kernel module is in vnet-module/ and the vnet forwarding
    3.10 +daemon is in vnetd/. The vnetd daemon makes vnets work across
    3.11 +subnets when multicast routing is not available.
    3.12 +
    3.13 +Mike Wray <mike.wray@hp.com>
    3.14 \ No newline at end of file
     4.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     4.2 +++ b/tools/vnet/Makefile	Mon Nov 22 16:49:15 2004 +0000
     4.3 @@ -0,0 +1,42 @@
     4.4 +
     4.5 +export LINUX_RELEASE ?=2.6
     4.6 +
     4.7 +all: compile
     4.8 +
     4.9 +compile: vnetd vnet-module
    4.10 +
    4.11 +gc.tar.gz:
    4.12 +	wget http://www.hpl.hp.com/personal/Hans_Boehm/gc/gc_source/$@
    4.13 +
    4.14 +gc: gc.tar.gz
    4.15 +	tar xfz gc.tar.gz
    4.16 +	ln -sf gc?.? gc
    4.17 +
    4.18 +gc-install: gc
    4.19 +	(cd gc && ./configure --prefix=`pwd`/install && make && make install)
    4.20 +
    4.21 +gc-clean:
    4.22 +	-$(MAKE) -C gc clean
    4.23 +
    4.24 +gc-pristine:
    4.25 +	-rm -rf gc?.? gc
    4.26 +
    4.27 +.PHONY: vnetd vnet-module install dist clean
    4.28 +
    4.29 +vnetd: gc-install
    4.30 +	$(MAKE) -C vnetd
    4.31 +
    4.32 +vnet-module:
    4.33 +	$(MAKE) -C vnet-module
    4.34 +
    4.35 +install: compile
    4.36 +	$(MAKE) -C vnetd install
    4.37 +	$(MAKE) -C vnet-module install
    4.38 +	$(MAKE) -C examples install
    4.39 +
    4.40 +dist: $(TARGET)
    4.41 +	$(MAKE) prefix=`pwd`/../../install dist=yes install
    4.42 +
    4.43 +clean:
    4.44 +	-$(MAKE) -C vnetd clean
    4.45 +	-$(MAKE) -C vnet-module clean
     5.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     5.2 +++ b/tools/vnet/doc/vnet-module.txt	Mon Nov 22 16:49:15 2004 +0000
     5.3 @@ -0,0 +1,50 @@
     5.4 +Vnet Module Command Interface
     5.5 +Mike Wray <mike.wray@hp.com>
     5.6 +2004/09/17
     5.7 +
     5.8 +When insmod the vnet-module creates /proc/vnet/policy which
     5.9 +can be used to control the module by writing commands into it.
    5.10 +The return code from the command should be returned by close.
    5.11 +
    5.12 +The commands are:
    5.13 +
    5.14 +(vnet.add (id <id>) [(security { none | auth | conf } )] )
    5.15 +
    5.16 +Create the vnet with id <id> and the given security level (default none).
    5.17 +Security levels:
    5.18 +- none: no security
    5.19 +- auth: message authentication (IPSEC hmac)
    5.20 +- conf: message confidentiality (IPSEC hmac and encryption)
    5.21 +
    5.22 +(vnet.del (id <id>))
    5.23 +
    5.24 +Delete the vnet with id <id>.
    5.25 +
    5.26 +(vif.add (vnet <vnetid>) (vmac <macaddr>))
    5.27 +
    5.28 +Add the vif with MAC address <macaddr> to the vnet with id <vnetid>.
    5.29 +This makes the vnet module respond to VARP requests for <macaddr>
    5.30 +on vnet <vnetid>.
    5.31 +
    5.32 +(vif.del (vnet <vnetid>) (vmac <macaddr>))
    5.33 +
    5.34 +Remove the vif with MAC address <macaddr> from the vnet with id <vnetid>.
    5.35 +The vnet module will stop responding to VARP for the vif.
    5.36 +
    5.37 +Examples:
    5.38 +
    5.39 +To create vnet 10 with no security:
    5.40 +
    5.41 +echo '(vnet.add (id 10))' > /proc/vnet/policy
    5.42 +
    5.43 +To create vnet 11 with message authentication:
    5.44 +
    5.45 +echo '(vnet.add (id 11) (security auth))' > /proc/vnet/policy
    5.46 +
    5.47 +To add the vif with vmac "aa:00:00:bc:34:ae" to vnet 10:
    5.48 +
    5.49 +echo '(vif.add (vnet 10) (vmac aa:00:00:bc:34:ae))' > /proc/vnet/policy
    5.50 +
    5.51 +To remove the vif from the vnet:
    5.52 +
    5.53 +echo '(vif.del (vnet 10) (vmac aa:00:00:bc:34:ae))' > /proc/vnet/policy
     6.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     6.2 +++ b/tools/vnet/doc/vnet-xend.txt	Mon Nov 22 16:49:15 2004 +0000
     6.3 @@ -0,0 +1,140 @@
     6.4 +
     6.5 +Vnets: Virtual Networks for Virtual Machines
     6.6 +
     6.7 +Mike Wray <mike.wray@hp.com>
     6.8 +
     6.9 +0) Introduction
    6.10 +---------------
    6.11 +
    6.12 +Vnets provide virtual private LANs for virtual machines.
    6.13 +This is done using bridging and tunneling. A virtual interface
    6.14 +on a vnet can only see other interfaces on the same vnet - it cannot
    6.15 +see the real network, and the real network cannot see it either.
    6.16 +
    6.17 +Virtual interfaces on the same vnet can be on the same machine
    6.18 +or on different machines, they can still talk. The hosting machines
    6.19 +can even be on different subnets if you run vnetd to forward,
    6.20 +or have multicast routing enabled.
    6.21 +
    6.22 +
    6.23 +1) Installing vnet support
    6.24 +--------------------------
    6.25 +
    6.26 +Assuming the code has been installed (make install in the parent directory),
    6.27 +configure xend to use 'network-vnet' instead of the default 'network' to
    6.28 +start up networking. This just loads the vnet module when networking starts.
    6.29 +
    6.30 +In /etc/xend/xend-config.sxp:
    6.31 +
    6.32 +Configure the network script:
    6.33 +
    6.34 +(network-script        network-vnet)
    6.35 +
    6.36 +Restart xend.
    6.37 +
    6.38 +2) Creating vnets
    6.39 +-----------------
    6.40 +
    6.41 +Xend already implements commands to add/remove vnets and
    6.42 +bridge to them. To add a vnet use
    6.43 +
    6.44 +xm call vnet_add <vnet config file>
    6.45 +
    6.46 +For example, if vnet97.sxp contains:
    6.47 +
    6.48 +(vnet (id 97) (bridge vnet97) (vnetif vnetif97) (security none))
    6.49 +
    6.50 +do
    6.51 +
    6.52 +xm call vnet_add vnet97.sxp
    6.53 +
    6.54 +This will define a vnet with id 97 and no security. The bridge for the
    6.55 +vnet is called vnet97 and the virtual interface for it is vnetif97.
    6.56 +To add an interface on a vm to this vnet simply set its bridge to vnet97
    6.57 +in its configuration.
    6.58 +
    6.59 +In Python:
    6.60 +
    6.61 +vif="bridge=vnet97"
    6.62 +
    6.63 +In sxp:
    6.64 +
    6.65 +(dev (vif (mac aa:00:00:01:02:03) (bridge vnet97)))
    6.66 +
    6.67 +Once configured, vnets are persistent in the xend database.
    6.68 +To remove a vnet use
    6.69 +
    6.70 +xm call vnet_delete <vnet id>
    6.71 +
    6.72 +To list vnets use
    6.73 +
    6.74 +xm call vnets
    6.75 +
    6.76 +To get information on a vnet id use
    6.77 +
    6.78 +xm call vnet <vnet id>
    6.79 +
    6.80 +3) Troubleshooting
    6.81 +------------------
    6.82 +
    6.83 +The vnet module should appear in 'lsmod'.
    6.84 +If a vnet has been configured it should appear in the output of 'xm call vnets'.
    6.85 +Its bridge and interface should appear in 'ifconfig'.
    6.86 +It should also show in 'brctl show', with its attached interfaces.
    6.87 +
    6.88 +You can 'see into' a vnet from dom0 if you put an IP address on the bridge.
    6.89 +For example, if you have vnet97 with a vm with ip addr 10.0.0.12 on it,
    6.90 +then
    6.91 +
    6.92 +ifconfig vnet97 10.0.0.20 up
    6.93 +
    6.94 +should let you ping 10.0.0.12 via the vnet97 bridge.
    6.95 +
    6.96 +4) Examples
    6.97 +-----------
    6.98 +
    6.99 +Here's the full config for a vm on vnet 97, using ip addr 10.0.0.12:
   6.100 +
   6.101 +(vm
   6.102 + (name dom12)
   6.103 + (memory '64')
   6.104 + (cpu '1')
   6.105 + (console '8502')
   6.106 + (image
   6.107 +  (linux
   6.108 +   (kernel /boot/vmlinuz-2.6.9-xenU)
   6.109 +   (ip 10.0.0.12:1.2.3.4::::eth0:off)
   6.110 +   (root /dev/hda1)
   6.111 +   (args 'rw fastboot 4')
   6.112 +  )
   6.113 + )
   6.114 + (device (vbd (uname phy:hda2) (dev hda1) (mode w)))
   6.115 + (device (vif (mac aa:00:00:11:00:12) (bridge vnet97)))
   6.116 +)
   6.117 +
   6.118 +If you run another vm on the same vnet:
   6.119 +
   6.120 +(vm
   6.121 + (name dom11)
   6.122 + (memory '64')
   6.123 + (cpu '1')
   6.124 + (console '8501')
   6.125 + (image
   6.126 +  (linux
   6.127 +   (kernel /boot/vmlinuz-2.6.9-xenU)
   6.128 +   (ip 10.0.0.11:1.2.3.4::::eth0:off)
   6.129 +   (root /dev/hda1)
   6.130 +   (args 'rw fastboot 4')
   6.131 +  )
   6.132 + )
   6.133 + (device (vbd (uname phy:hda3) (dev hda1) (mode w)))
   6.134 + (device (vif (mac aa:00:00:11:00:11) (bridge vnet97)))
   6.135 +)
   6.136 +
   6.137 +the vms should be able to talk over the vnet. Check with ping.
   6.138 +If they are both on the same machine the connection will simply
   6.139 +be the vnet97 bridge, if they are on separate machines their
   6.140 +packets will be tunneled in etherip. They should be able to
   6.141 +see each other, but not the real network.
   6.142 +
   6.143 +
     7.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     7.2 +++ b/tools/vnet/examples/Makefile	Mon Nov 22 16:49:15 2004 +0000
     7.3 @@ -0,0 +1,12 @@
     7.4 +# -*- mode: Makefile; -*-
     7.5 +#============================================================================
     7.6 +
     7.7 +XEN_SCRIPT_DIR:=/etc/xen/scripts
     7.8 +
     7.9 +all:
    7.10 +
    7.11 +install:
    7.12 +	install -m 0755 -d $(prefix)$(XEN_SCRIPT_DIR)
    7.13 +	install -m 0554 network-vnet $(prefix)$(XEN_SCRIPT_DIR)
    7.14 +
    7.15 +clean:
    7.16 \ No newline at end of file
     8.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     8.2 +++ b/tools/vnet/examples/network-vnet	Mon Nov 22 16:49:15 2004 +0000
     8.3 @@ -0,0 +1,218 @@
     8.4 +#!/bin/sh
     8.5 +#============================================================================
     8.6 +# Default Xen network start/stop script.
     8.7 +# Xend calls a network script when it starts.
     8.8 +# The script name to use is defined in /etc/xen/xend-config.sxp
     8.9 +# in the network-script field.
    8.10 +#
    8.11 +# This script creates a bridge (default xen-br0), adds a device
    8.12 +# (default eth0) to it, copies the IP addresses from the device
    8.13 +# to the bridge and adjusts the routes accordingly.
    8.14 +#
    8.15 +# If all goes well, this should ensure that networking stays up.
    8.16 +# However, some configurations are upset by this, especially
    8.17 +# NFS roots. If the bridged setup does not meet your needs,
    8.18 +# configure a different script, for example using routing instead.
    8.19 +#
    8.20 +# Usage:
    8.21 +#
    8.22 +# network (start|stop|status) {VAR=VAL}*
    8.23 +#
    8.24 +# Vars:
    8.25 +#
    8.26 +# bridge     The bridge to use (default xen-br0).
    8.27 +# netdev     The interface to add to the bridge (default eth0).
    8.28 +# antispoof  Whether to use iptables to prevent spoofing (default yes).
    8.29 +#
    8.30 +# start:
    8.31 +# Creates the bridge and enslaves netdev to it.
    8.32 +# Copies the IP addresses from netdev to the bridge.
    8.33 +# Deletes the routes to netdev and adds them on bridge.
    8.34 +#
    8.35 +# stop:
    8.36 +# Removes netdev from the bridge.
    8.37 +# Deletes the routes to bridge and adds them to netdev.
    8.38 +#
    8.39 +# status:
    8.40 +# Print ifconfig for netdev and bridge.
    8.41 +# Print routes.
    8.42 +#
    8.43 +#============================================================================
    8.44 +
    8.45 +# Exit if anything goes wrong.
    8.46 +set -e 
    8.47 +
    8.48 +# First arg is the operation.
    8.49 +OP=$1
    8.50 +shift
    8.51 +
    8.52 +# Pull variables in args in to environment.
    8.53 +for arg ; do export "${arg}" ; done
    8.54 +
    8.55 +bridge=${bridge:-xen-br0}
    8.56 +netdev=${netdev:-eth0}
    8.57 +antispoof=${antispoof:-yes}
    8.58 +
    8.59 +echo "network $OP bridge=$bridge netdev=$netdev antispoof=$antispoof"
    8.60 +
    8.61 +# Usage: transfer_addrs src dst
    8.62 +# Copy all IP addresses (including aliases) from device $src to device $dst.
    8.63 +transfer_addrs () {
    8.64 +    local src=$1
    8.65 +    local dst=$2
    8.66 +    # Don't bother if $dst already has IP addresses.
    8.67 +    if ip addr show dev ${dst} | egrep -q '^ *inet' ; then
    8.68 +        return
    8.69 +    fi
    8.70 +    # Address lines start with 'inet' and have the device in them.
    8.71 +    # Replace 'inet' with 'ip addr add' and change the device name $src
    8.72 +    # to 'dev $src'. Remove netmask as we'll add routes later.
    8.73 +    ip addr show dev ${src} | egrep '^ *inet' | sed -e "
    8.74 +s/inet/ip addr add/
    8.75 +s@\([0-9]\+\.[0-9]\+\.[0-9]\+\.[0-9]\+\)/[0-9]\+@\1@
    8.76 +s/${src}/dev ${dst}/
    8.77 +" | sh -e
    8.78 +}
    8.79 +
    8.80 +# Usage: transfer_routes src dst
    8.81 +# Get all IP routes to device $src, delete them, and
    8.82 +# add the same routes to device $dst.
    8.83 +# The original routes have to be deleted, otherwise adding them
    8.84 +# for $dst fails (duplicate routes).
    8.85 +transfer_routes () {
    8.86 +    local src=$1
    8.87 +    local dst=$2
    8.88 +    # List all routes and grep the ones with $src in.
    8.89 +    # Stick 'ip route del' on the front to delete.
    8.90 +    # Change $src to $dst and use 'ip route add' to add.
    8.91 +    ip route list | grep ${src} | sed -e "
    8.92 +h
    8.93 +s/^/ip route del /
    8.94 +P
    8.95 +g
    8.96 +s/${src}/${dst}/
    8.97 +s/^/ip route add /
    8.98 +P
    8.99 +d
   8.100 +" | sh -e
   8.101 +}
   8.102 +
   8.103 +# Usage: create_bridge dev bridge
   8.104 +# Create bridge $bridge and add device $dev to it.
   8.105 +create_bridge () {
   8.106 +    local dev=$1
   8.107 +    local bridge=$2
   8.108 +
   8.109 +    # Don't create the bridge if it already exists.
   8.110 +    if ! brctl show | grep -q ${bridge} ; then
   8.111 +        brctl addbr ${bridge}
   8.112 +        brctl stp ${bridge} off
   8.113 +        brctl setfd ${bridge} 0
   8.114 +    fi
   8.115 +    ifconfig ${bridge} up
   8.116 +}
   8.117 +
   8.118 +# Usage: antispoofing dev bridge
   8.119 +# Set the default forwarding policy for $dev to drop.
   8.120 +# Allow forwarding to the bridge.
   8.121 +antispoofing () {
   8.122 +    local dev=$1
   8.123 +    local bridge=$2
   8.124 +
   8.125 +    iptables -P FORWARD DROP
   8.126 +    iptables -A FORWARD -m physdev --physdev-in ${dev} -j ACCEPT
   8.127 +}
   8.128 +
   8.129 +# Usage: show_status dev bridge
   8.130 +# Print ifconfig and routes.
   8.131 +show_status () {
   8.132 +    local dev=$1
   8.133 +    local bridge=$2
   8.134 +    
   8.135 +    echo '============================================================'
   8.136 +    ifconfig ${dev}
   8.137 +    ifconfig ${bridge}
   8.138 +    echo ' '
   8.139 +    ip route list
   8.140 +    echo ' '
   8.141 +    route -n
   8.142 +    echo '============================================================'
   8.143 +}
   8.144 +
   8.145 +# Insert the vnet module if it can be found and
   8.146 +# it's not already there.
   8.147 +vnet_insert () {
   8.148 +    local module="vnet_module"
   8.149 +    local mod_dir=/lib/modules/$(uname -r)/kernel
   8.150 +    local mod_path="${mod_dir}/${module}"
   8.151 +    local mod_obj=""
   8.152 +
   8.153 +    for ext in ".o" ".ko" ; do
   8.154 +        f=${mod_path}${ext}
   8.155 +        if [ -f ${f} ] ; then
   8.156 +            mod_obj=$f
   8.157 +            break
   8.158 +        fi
   8.159 +    done
   8.160 +    if [ "${mod_obj}" == "" ] ; then
   8.161 +        return
   8.162 +    fi
   8.163 +    if lsmod | grep -q ${module} ; then
   8.164 +        echo "VNET: ${module} loaded"
   8.165 +    else
   8.166 +        echo "VNET: Loading ${module}..."
   8.167 +               insmod ${mod_obj}
   8.168 +    fi
   8.169 +}
   8.170 +
   8.171 +op_start () {
   8.172 +    if [ "${bridge}" == "null" ] ; then
   8.173 +        return
   8.174 +    fi
   8.175 +    # Create the bridge and give it the interface IP addresses.
   8.176 +    # Move the interface routes onto the bridge.
   8.177 +    create_bridge ${netdev} ${bridge}
   8.178 +    transfer_addrs ${netdev} ${bridge}
   8.179 +    transfer_routes ${netdev} ${bridge}
   8.180 +    # Don't add $dev to $bridge if it's already on a bridge.
   8.181 +    if ! brctl show | grep -q ${netdev} ; then
   8.182 +        brctl addif ${bridge} ${netdev}
   8.183 +    fi
   8.184 +    
   8.185 +    if [ ${antispoof} == 'yes' ] ; then
   8.186 +        antispoofing ${netdev} ${bridge}
   8.187 +    fi
   8.188 +
   8.189 +    vnet_insert
   8.190 +}
   8.191 +
   8.192 +op_stop () {
   8.193 +    if [ "${bridge}" == "null" ] ; then
   8.194 +        return
   8.195 +    fi
   8.196 +    # Remove the interface from the bridge.
   8.197 +    # Move the routes back to the interface.
   8.198 +    brctl delif ${bridge} ${netdev}
   8.199 +    transfer_routes ${bridge} ${netdev}
   8.200 +
   8.201 +    # It's not our place to be enabling forwarding...
   8.202 +}
   8.203 +
   8.204 +case ${OP} in
   8.205 +    start)
   8.206 +        op_start
   8.207 +        ;;
   8.208 +    
   8.209 +    stop)
   8.210 +        op_stop
   8.211 +        ;;
   8.212 +
   8.213 +    status)
   8.214 +        show_status ${netdev} ${bridge}
   8.215 +       ;;
   8.216 +
   8.217 +    *)
   8.218 +       echo 'Unknown command: ' ${OP}
   8.219 +       echo 'Valid commands are: start, stop, status'
   8.220 +       exit 1
   8.221 +esac
     9.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     9.2 +++ b/tools/vnet/examples/vnet97.sxp	Mon Nov 22 16:49:15 2004 +0000
     9.3 @@ -0,0 +1,3 @@
     9.4 +# Vnet configuration for a vnet with id 97 and no security.
     9.5 +# Configure using 'xm call vnet_add vnet97.sxp'.
     9.6 +(vnet (id 97) (bridge vnet97) (vnetif vnetif97) (security none))
    10.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    10.2 +++ b/tools/vnet/examples/vnet98.sxp	Mon Nov 22 16:49:15 2004 +0000
    10.3 @@ -0,0 +1,3 @@
    10.4 +# Vnet configuration for a vnet with id 98 and message authentication.
    10.5 +# Configure using 'xm call vnet_add vnet98.sxp'.
    10.6 +(vnet (id 98) (bridge vnet98) (vnetif vnetif98) (security auth))
    11.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    11.2 +++ b/tools/vnet/examples/vnet99.sxp	Mon Nov 22 16:49:15 2004 +0000
    11.3 @@ -0,0 +1,3 @@
    11.4 +# Vnet configuration for a vnet with id 99 and message confidentiality.
    11.5 +# Configure using 'xm call vnet_add vnet99.sxp'.
    11.6 +(vnet (id 99) (bridge vnet99) (vnetif vnetif99) (security conf))
    12.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    12.2 +++ b/tools/vnet/vnet-module/00README	Mon Nov 22 16:49:15 2004 +0000
    12.3 @@ -0,0 +1,41 @@
    12.4 +Vnet module for network virtualization.
    12.5 +Mike Wray <mike.wray@hp.com>
    12.6 +
    12.7 +*) Compiling
    12.8 +The vnet module can be compiled for 2.4 or 2.6 series kernels.
    12.9 +The makefiles  use the following variables, which
   12.10 +can be set in your env or on the make command line:
   12.11 +
   12.12 +LINUX_RELEASE: linux release to compile for, 2.4 (default), or 2.6.
   12.13 +XENO_ROOT: root of the xen tree containing kernel source. Default '..'.
   12.14 +ROOT: root path to install in, default is XENO_ROOT/install.
   12.15 +      Set to '/' to install relative to filesystem root.
   12.16 +KERNEL_VERSION: kernel version, default got from XENO_ROOT.
   12.17 +KERNEL_MINOR: kernel minor version, default -xen0.
   12.18 +KERNEL_SRC: path to kernel source, default linux-<VERSION> under XENO_ROOT.
   12.19 +
   12.20 +*) For 2.4 kernel
   12.21 +
   12.22 +To compile from scratch:
   12.23 +
   12.24 +make clean
   12.25 +make 
   12.26 +
   12.27 +This will build vnet_module.o in the current directory.
   12.28 +To install the module use
   12.29 +
   12.30 +make install
   12.31 +
   12.32 +*) For 2.6 kernel
   12.33 +
   12.34 +To compile from scratch:
   12.35 +
   12.36 +make clean
   12.37 +make LINUX_RELEASE=2.6
   12.38 +
   12.39 +This will build vnet_module.ko in the current directory.
   12.40 +To install the module use
   12.41 +
   12.42 +make LINUX_RELEASE=2.6 install
   12.43 +
   12.44 +
    13.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    13.2 +++ b/tools/vnet/vnet-module/Makefile	Mon Nov 22 16:49:15 2004 +0000
    13.3 @@ -0,0 +1,67 @@
    13.4 +# -*- mode: Makefile; -*-
    13.5 +#============================================================================
    13.6 +#
    13.7 +# Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
    13.8 +#
    13.9 +# This program is free software; you can redistribute it and/or modify
   13.10 +# it under the terms of the GNU General Public License as published by the
   13.11 +# Free Software Foundation; either version 2 of the License, or (at your
   13.12 +# option) any later version.
   13.13 +#
   13.14 +# This program is distributed in the hope that it will be useful, but
   13.15 +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
   13.16 +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
   13.17 +# for more details.
   13.18 +#
   13.19 +# You should have received a copy of the GNU General Public License along
   13.20 +# with this program; if not, write to the Free software Foundation, Inc.,
   13.21 +# 59 Temple Place, suite 330, Boston, MA 02111-1307 USA
   13.22 +#============================================================================
   13.23 +
   13.24 +#============================================================================
   13.25 +ifeq ($(src),)
   13.26 +LINUX_RELEASE ?=2.6
   13.27 +
   13.28 +include Makefile-$(LINUX_RELEASE)
   13.29 +
   13.30 +#============================================================================
   13.31 +else
   13.32 +#============================================================================
   13.33 +# This section is for the 2.6 kbuild.
   13.34 +
   13.35 +#$(warning KBUILD_EXTMOD $(KBUILD_EXTMOD))
   13.36 +#$(warning src $(src))
   13.37 +#$(warning obj $(obj))
   13.38 +
   13.39 +include $(src)/Makefile.vnet
   13.40 +
   13.41 +obj-m = vnet_module.o
   13.42 +vnet_module-objs = $(VNET_OBJ)
   13.43 +vnet_module-objs += $(VNET_LIB_OBJ)
   13.44 +
   13.45 +#----------------------------------------------------------------------------
   13.46 +# The fancy stuff in the kernel build defeats 'vpath %.c' so we can't
   13.47 +# use that to get the lib files compiled.
   13.48 +# Setup explicit rules for them using the kbuild C compile rule.
   13.49 +
   13.50 +# File names in the lib dir.
   13.51 +remote_srcs = $(foreach file,$(VNET_LIB_SRC),$(LIB_DIR)/$(file))
   13.52 +
   13.53 +# Equivalent file names here.
   13.54 +local_srcs = $(foreach file,$(VNET_LIB_SRC),$(src)/$(file))
   13.55 +
   13.56 +# Objects for the local names.
   13.57 +local_objs = $(local_srcs:.c=.o)
   13.58 +
   13.59 +# Make the local objects depend on compiling the remote sources.
   13.60 +$(local_objs): $(src)/%.o: $(LIB_DIR)/%.c
   13.61 +	$(call if_changed_rule,cc_o_c)
   13.62 +#----------------------------------------------------------------------------
   13.63 +
   13.64 +vpath %.h $(LIB_DIR)
   13.65 +EXTRA_CFLAGS += -I $(LIB_DIR)
   13.66 +EXTRA_CFLAGS += -I $(src)
   13.67 +
   13.68 +endif
   13.69 +#============================================================================
   13.70 +
    14.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    14.2 +++ b/tools/vnet/vnet-module/Makefile-2.4	Mon Nov 22 16:49:15 2004 +0000
    14.3 @@ -0,0 +1,97 @@
    14.4 +# -*- mode: Makefile; -*-
    14.5 +#============================================================================
    14.6 +#
    14.7 +# Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
    14.8 +#
    14.9 +# This program is free software; you can redistribute it and/or modify
   14.10 +# it under the terms of the GNU General Public License as published by the
   14.11 +# Free Software Foundation; either version 2 of the License, or (at your
   14.12 +# option) any later version.
   14.13 +#
   14.14 +# This program is distributed in the hope that it will be useful, but
   14.15 +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
   14.16 +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
   14.17 +# for more details.
   14.18 +#
   14.19 +# You should have received a copy of the GNU General Public License along
   14.20 +# with this program; if not, write to the Free software Foundation, Inc.,
   14.21 +# 59 Temple Place, suite 330, Boston, MA 02111-1307 USA
   14.22 +#============================================================================
   14.23 +
   14.24 +#============================================================================
   14.25 +# Vnet module makefile for 2.4 series kernels.
   14.26 +
   14.27 +include Makefile.ver
   14.28 +
   14.29 +KERNEL_MODULE := vnet_module.o
   14.30 +
   14.31 +CONFIG_MODVERSIONS := $(shell grep 'CONFIG_MODVERSIONS=y' $(KERNEL_SRC)/.config && echo 1 || echo 0)
   14.32 +
   14.33 +include Makefile.vnet
   14.34 +
   14.35 +VNET_OBJ += $(VNET_LIB_OBJ)
   14.36 +
   14.37 +#----------------------------------------------------------------------------
   14.38 +
   14.39 +vpath %.h $(KERNEL_SRC)/include
   14.40 +INCLUDES+= -I $(KERNEL_SRC)/include
   14.41 +
   14.42 +vpath %.h $(LIB_DIR)
   14.43 +vpath %.c $(LIB_DIR)
   14.44 +INCLUDES += -I $(LIB_DIR)
   14.45 +
   14.46 +INCLUDES+= -I .
   14.47 +
   14.48 +#----------------------------------------------------------------------------
   14.49 +
   14.50 +CPPFLAGS += -D__KERNEL__
   14.51 +CPPFLAGS += -DMODULE
   14.52 +
   14.53 +ifeq ($(CONFIG_MODVERSIONS), 1)
   14.54 +CPPFLAGS += -DMODVERSIONS
   14.55 +CPPFLAGS += -include $(KERNEL_SRC)/include/linux/modversions.h
   14.56 +endif
   14.57 +
   14.58 +CPPFLAGS += $(INCLUDES)
   14.59 +
   14.60 +CFLAGS += -Wall
   14.61 +CFLAGS += -Wstrict-prototypes
   14.62 +CFLAGS += -Wno-trigraphs
   14.63 +CFLAGS += -Wno-unused-function
   14.64 +CFLAGS += -Wno-unused-parameter 
   14.65 +
   14.66 +CFLAGS += -g
   14.67 +CFLAGS += -O2
   14.68 +CFLAGS += -fno-strict-aliasing 
   14.69 +CFLAGS += -fno-common 
   14.70 +#CFLAGS += -fomit-frame-pointer
   14.71 +
   14.72 +# Dependencies. Gcc generates them for us.
   14.73 +CFLAGS += -Wp,-MD,.$(@F).d
   14.74 +VNET_DEP = .*.d
   14.75 +#----------------------------------------------------------------------------
   14.76 +
   14.77 +.PHONY: all
   14.78 +all: module
   14.79 +
   14.80 +.PHONY: module modules
   14.81 +module modules: $(KERNEL_MODULE)
   14.82 +
   14.83 +$(KERNEL_MODULE): $(VNET_OBJ)
   14.84 +	$(LD) -r -o $@ $^
   14.85 +
   14.86 +.PHONY: install install-module modules_install
   14.87 +install install-module modules_install: module
   14.88 +	install -m 0755 -d $(prefix)$(KERNEL_MODULE_DIR)
   14.89 +	install -m 0554 $(KERNEL_MODULE) $(prefix)$(KERNEL_MODULE_DIR)
   14.90 +
   14.91 +TAGS:
   14.92 +	etags *.c *.h
   14.93 +
   14.94 +.PHONY: clean
   14.95 +clean:
   14.96 +	@rm -f *.a *.o *.ko *~
   14.97 +	@rm -f $(VNET_DEP) .*.cmd *.mod.?
   14.98 +	@rm -rf .tmp_versions
   14.99 +
  14.100 +-include $(VNET_DEP)
    15.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    15.2 +++ b/tools/vnet/vnet-module/Makefile-2.6	Mon Nov 22 16:49:15 2004 +0000
    15.3 @@ -0,0 +1,51 @@
    15.4 +# -*- mode: Makefile; -*-
    15.5 +#============================================================================
    15.6 +#
    15.7 +# Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
    15.8 +#
    15.9 +# This program is free software; you can redistribute it and/or modify
   15.10 +# it under the terms of the GNU General Public License as published by the
   15.11 +# Free Software Foundation; either version 2 of the License, or (at your
   15.12 +# option) any later version.
   15.13 +#
   15.14 +# This program is distributed in the hope that it will be useful, but
   15.15 +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
   15.16 +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
   15.17 +# for more details.
   15.18 +#
   15.19 +# You should have received a copy of the GNU General Public License along
   15.20 +# with this program; if not, write to the Free software Foundation, Inc.,
   15.21 +# 59 Temple Place, suite 330, Boston, MA 02111-1307 USA
   15.22 +#============================================================================
   15.23 +
   15.24 +#============================================================================
   15.25 +# Vnet module makefile for 2.6 series kernels.
   15.26 +
   15.27 +LINUX_RELEASE ?= 2.6
   15.28 +include Makefile.ver
   15.29 +
   15.30 +KERNEL_MODULE = vnet_module.ko
   15.31 +
   15.32 +#----------------------------------------------------------------------------
   15.33 +#export KBUILD_VERBOSE=1
   15.34 +
   15.35 +.PHONY: all
   15.36 +all: module
   15.37 +
   15.38 +.PHONY: module
   15.39 +module modules:
   15.40 +	$(MAKE) -C $(KERNEL_SRC) M=`pwd` modules
   15.41 +
   15.42 +.PHONY: install install-module modules_install
   15.43 +install install-module modules_install: module
   15.44 +	install -m 0755 -d $(prefix)$(KERNEL_MODULE_DIR)
   15.45 +	install -m 0554 $(KERNEL_MODULE) $(prefix)$(KERNEL_MODULE_DIR)
   15.46 +
   15.47 +.PHONY: clean
   15.48 +clean:
   15.49 +	@$(MAKE) -C $(KERNEL_SRC) M=$(PWD) clean
   15.50 +	@rm -f *.a *.o *.ko *~ .*.d .*.cmd *.mod.?
   15.51 +
   15.52 +TAGS:
   15.53 +	etags *.c *.h
   15.54 +
    16.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    16.2 +++ b/tools/vnet/vnet-module/Makefile.ver	Mon Nov 22 16:49:15 2004 +0000
    16.3 @@ -0,0 +1,49 @@
    16.4 +# -*- mode: Makefile; -*-
    16.5 +#============================================================================
    16.6 +#
    16.7 +# Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
    16.8 +#
    16.9 +# This program is free software; you can redistribute it and/or modify
   16.10 +# it under the terms of the GNU General Public License as published by the
   16.11 +# Free Software Foundation; either version 2 of the License, or (at your
   16.12 +# option) any later version.
   16.13 +#
   16.14 +# This program is distributed in the hope that it will be useful, but
   16.15 +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
   16.16 +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
   16.17 +# for more details.
   16.18 +#
   16.19 +# You should have received a copy of the GNU General Public License along
   16.20 +# with this program; if not, write to the Free software Foundation, Inc.,
   16.21 +# 59 Temple Place, suite 330, Boston, MA 02111-1307 USA
   16.22 +#============================================================================
   16.23 +
   16.24 +#----------------------------------------------------------------------------
   16.25 +# Xeno/xen.
   16.26 +
   16.27 +# Root of xen tree.
   16.28 +XEN_ROOT ?=../../..
   16.29 +
   16.30 +# Path to relativize the install. Set to /
   16.31 +# to install relative to filesystem root.
   16.32 +prefix ?=$(XEN_ROOT)/install/
   16.33 +#----------------------------------------------------------------------------
   16.34 +
   16.35 +LINUX_RELEASE ?=2.6
   16.36 +KERNEL_MINOR ?=-xen0
   16.37 +
   16.38 +LINUX_VERSION ?= $(shell ( /bin/ls -ld $(XEN_ROOT)/linux-$(LINUX_RELEASE).*-xen-sparse ) 2>/dev/null | \
   16.39 +                      sed -e 's!^.*linux-\(.\+\)-xen-sparse!\1!' )
   16.40 +
   16.41 +ifeq ($(LINUX_VERSION),)
   16.42 +$(error Kernel source for linux $(LINUX_RELEASE) not found)
   16.43 +endif
   16.44 +
   16.45 +KERNEL_VERSION =$(LINUX_VERSION)$(KERNEL_MINOR)
   16.46 +
   16.47 +KERNEL_SRC ?= $(XEN_ROOT)/linux-$(KERNEL_VERSION)
   16.48 +
   16.49 +KERNEL_MODULE_DIR = /lib/modules/$(KERNEL_VERSION)/kernel
   16.50 +
   16.51 +#$(warning KERNEL_VERSION $(KERNEL_VERSION))
   16.52 +#$(warning KERNEL_SRC $(KERNEL_SRC))
    17.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    17.2 +++ b/tools/vnet/vnet-module/Makefile.vnet	Mon Nov 22 16:49:15 2004 +0000
    17.3 @@ -0,0 +1,57 @@
    17.4 +# -*- mode: Makefile; -*-
    17.5 +#============================================================================
    17.6 +#
    17.7 +# Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
    17.8 +#
    17.9 +# This program is free software; you can redistribute it and/or modify
   17.10 +# it under the terms of the GNU General Public License as published by the
   17.11 +# Free Software Foundation; either version 2 of the License, or (at your
   17.12 +# option) any later version.
   17.13 +#
   17.14 +# This program is distributed in the hope that it will be useful, but
   17.15 +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
   17.16 +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
   17.17 +# for more details.
   17.18 +#
   17.19 +# You should have received a copy of the GNU General Public License along
   17.20 +# with this program; if not, write to the Free software Foundation, Inc.,
   17.21 +# 59 Temple Place, suite 330, Boston, MA 02111-1307 USA
   17.22 +#============================================================================
   17.23 +
   17.24 +ifeq ($(src),)
   17.25 +SRC_DIR=
   17.26 +else
   17.27 +SRC_DIR=$(src)/
   17.28 +endif
   17.29 +
   17.30 +LIB_DIR := $(SRC_DIR)../../libxutil
   17.31 +
   17.32 +VNET_SRC :=
   17.33 +VNET_SRC += esp.c
   17.34 +VNET_SRC += etherip.c
   17.35 +VNET_SRC += random.c
   17.36 +VNET_SRC += sa_algorithm.c
   17.37 +VNET_SRC += sa.c
   17.38 +VNET_SRC += skb_context.c
   17.39 +VNET_SRC += skb_util.c
   17.40 +VNET_SRC += tunnel.c
   17.41 +VNET_SRC += varp.c
   17.42 +VNET_SRC += varp_socket.c
   17.43 +VNET_SRC += vif.c
   17.44 +VNET_SRC += vnet.c
   17.45 +VNET_SRC += vnet_dev.c
   17.46 +VNET_SRC += vnet_ioctl.c
   17.47 +
   17.48 +VNET_LIB_SRC += allocate.c
   17.49 +VNET_LIB_SRC += enum.c
   17.50 +VNET_LIB_SRC += hash_table.c
   17.51 +VNET_LIB_SRC += iostream.c
   17.52 +VNET_LIB_SRC += kernel_stream.c
   17.53 +VNET_LIB_SRC += sxpr.c
   17.54 +VNET_LIB_SRC += sxpr_parser.c
   17.55 +VNET_LIB_SRC += sys_net.c
   17.56 +VNET_LIB_SRC += sys_string.c
   17.57 +
   17.58 +VNET_OBJ := $(VNET_SRC:.c=.o)
   17.59 +VNET_LIB_OBJ := $(VNET_LIB_SRC:.c=.o)
   17.60 +
    18.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    18.2 +++ b/tools/vnet/vnet-module/esp.c	Mon Nov 22 16:49:15 2004 +0000
    18.3 @@ -0,0 +1,863 @@
    18.4 +/*
    18.5 + * Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
    18.6 + *
    18.7 + * This program is free software; you can redistribute it and/or modify
    18.8 + * it under the terms of the GNU General Public License as published by the 
    18.9 + * Free Software Foundation; either version 2 of the License, or (at your
   18.10 + * option) any later version.
   18.11 + * 
   18.12 + * This program is distributed in the hope that it will be useful, but
   18.13 + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
   18.14 + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
   18.15 + * for more details.
   18.16 + *
   18.17 + * You should have received a copy of the GNU General Public License along
   18.18 + * with this program; if not, write to the Free software Foundation, Inc.,
   18.19 + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA
   18.20 + *
   18.21 + */
   18.22 +#include <linux/config.h>
   18.23 +#include <linux/module.h>
   18.24 +#include <linux/types.h>
   18.25 +#include <linux/sched.h>
   18.26 +#include <linux/kernel.h>
   18.27 +#include <asm/uaccess.h>
   18.28 +
   18.29 +#include <linux/init.h>
   18.30 +
   18.31 +#include <linux/version.h>
   18.32 +
   18.33 +#include <linux/skbuff.h>
   18.34 +#include <linux/netdevice.h>
   18.35 +#include <linux/net.h>
   18.36 +#include <linux/in.h>
   18.37 +#include <linux/inet.h>
   18.38 +
   18.39 +#include <net/ip.h>
   18.40 +#include <net/protocol.h>
   18.41 +#include <net/route.h>
   18.42 +
   18.43 +#include <linux/if_ether.h>
   18.44 +#include <linux/icmp.h>
   18.45 +
   18.46 +#include <asm/scatterlist.h>
   18.47 +#include <linux/crypto.h>
   18.48 +#include <linux/pfkeyv2.h>
   18.49 +#include <linux/random.h>
   18.50 +
   18.51 +#include <esp.h>
   18.52 +#include <sa.h>
   18.53 +#include <sa_algorithm.h>
   18.54 +#include <tunnel.h>
   18.55 +#include <vnet.h>
   18.56 +#include <skb_util.h>
   18.57 +
   18.58 +static const int DEBUG_ICV = 0;
   18.59 +
   18.60 +#define MODULE_NAME "IPSEC"
   18.61 +#define DEBUG 1
   18.62 +#undef DEBUG
   18.63 +#include "debug.h"
   18.64 +
   18.65 +/* Outgoing packet:                            [ eth | ip | data ]
   18.66 + * After etherip:        [ eth2 | ip2 |  ethip | eth | ip | data ]
   18.67 + * After esp   :   [ eth2 | ip2 | esp | {ethip | eth | ip | data} | pad | icv ]
   18.68 + *                                                        ^     +
   18.69 + * The curly braces { ... } denote encryption.
   18.70 + * The esp header includes the fixed esp headers and the iv (variable size).
   18.71 + * The point marked ^ does not move. To the left is in the header, to the right
   18.72 + * is in the frag. Remember that all outgoing skbs (from domains) have 1 frag.
   18.73 + * Data after + is added by esp, using an extra frag.
   18.74 + *
   18.75 + * Incoming as above.
   18.76 + * After decrypt:  [ eth2 | ip2 | esp |  ethip | eth | ip | data  | pad | icv ]
   18.77 + * Trim tail:      [ eth2 | ip2 | esp |  ethip | eth | ip | data ]
   18.78 + * Drop hdr:             [ eth2 | ip2 |  ethip | eth | ip | data ]
   18.79 + *                                    ^
   18.80 + * The point marked ^ does not move. Incoming skbs are linear (no frags).
   18.81 + * The tail is trimmed by adjusting skb->tail and len.
   18.82 + * The esp hdr is dropped by using memmove to move the headers and
   18.83 + * adjusting the skb pointers.
   18.84 + *
   18.85 + * todo: Now this code is in linux we can't assume 1 frag for outbound skbs,
   18.86 + * or (maybe) that memmove is safe on inbound.
   18.87 + */
   18.88 +
   18.89 +/** Round n up to a multiple of block.
   18.90 + * If block is less than 2 does nothing.
   18.91 + * Otherwise assume block is a power of 2.
   18.92 + *
   18.93 + * @param n to round up
   18.94 + * @param block size to round to a multiple of
   18.95 + * @return rounded value
   18.96 + */
   18.97 +static inline int roundup(int n, int block){
   18.98 +    if(block <= 1) return n;
   18.99 +    block--;
  18.100 +    return (n + block) & ~block;
  18.101 +}
  18.102 +
  18.103 +/** Check if n is a multiple of block.
  18.104 + * If block is less than 2 returns 1.
  18.105 + * Otherwise assumes block is a power of 2.
  18.106 + *
  18.107 + * @param n to check
  18.108 + * @param block block size
  18.109 + * @return 1 if a multiple, 0 otherwise
  18.110 + */
  18.111 +static inline int multipleof(int n, int block){
  18.112 +    if(block <= 1) return 1;
  18.113 +    block--;
  18.114 +    return !(n & block);
  18.115 +}
  18.116 +
  18.117 +/** Convert from bits to bytes.
  18.118 + *
  18.119 + * @param n number of bits
  18.120 + * @return number of bytes
  18.121 + */
  18.122 +static inline int bits_to_bytes(int n){
  18.123 +    return n / 8;
  18.124 +}
  18.125 +
  18.126 +
  18.127 +/** Insert esp padding at the end of an skb.
  18.128 + * Inserts padding bytes, number of padding bytes, protocol number.
  18.129 + *
  18.130 + * @param skb skb
  18.131 + * @param offset offset from skb end to where padding should end
  18.132 + * @param extra_n total amount of padding
  18.133 + * @param protocol protocol number (from original ip hdr)
  18.134 + * @return 0 on success, error code otherwise
  18.135 + */
  18.136 +static int esp_sa_pad(struct sk_buff *skb, int offset, int extra_n,
  18.137 +                      unsigned char protocol){
  18.138 +    int err;
  18.139 +    char *data;
  18.140 +    int pad_n = extra_n - ESP_PAD_N;
  18.141 +    int i;
  18.142 +    char buf[extra_n];
  18.143 +
  18.144 +    data = buf;
  18.145 +    for(i = 1; i <= pad_n; i++){
  18.146 +        *data++ = i;
  18.147 +    }
  18.148 +    *data++ = pad_n;
  18.149 +    *data++ = protocol;
  18.150 +    err = skb_put_bits(skb, skb->len - offset - extra_n, buf, extra_n);
  18.151 +    return err;
  18.152 +}
  18.153 +
  18.154 +/** Encrypt skb. Skips esp header and iv.
  18.155 + * Assumes skb->data points at esp header.
  18.156 + *
  18.157 + * @param esp esp state
  18.158 + * @parm esph esp header
  18.159 + * @param skb packet
  18.160 + * @param head_n size of esp header and iv
  18.161 + * @param iv_n size of iv
  18.162 + * @param text_n size of ciphertext
  18.163 + * @return 0 on success, error code otherwise
  18.164 + */
  18.165 +static int esp_sa_encrypt(ESPState *esp, ESPHdr *esph, struct sk_buff *skb,
  18.166 +                   int head_n, int iv_n, int text_n){
  18.167 +    int err = 0;
  18.168 +    int sg_n = skb_shinfo(skb)->nr_frags + 1;
  18.169 +    struct scatterlist sg[sg_n];
  18.170 +
  18.171 +    err = skb_scatterlist(skb, sg, &sg_n, head_n, text_n);
  18.172 +    if(err) goto exit;
  18.173 +    if(iv_n){
  18.174 +        crypto_cipher_set_iv(esp->cipher.tfm, esp->cipher.iv, iv_n);
  18.175 +    }
  18.176 +    crypto_cipher_encrypt(esp->cipher.tfm, sg, sg, text_n);
  18.177 +    if(iv_n){
  18.178 +        memcpy(esph->data, esp->cipher.iv, iv_n);
  18.179 +        crypto_cipher_get_iv(esp->cipher.tfm, esp->cipher.iv, iv_n);
  18.180 +    }
  18.181 +  exit:
  18.182 +    return err;
  18.183 +}
  18.184 +
  18.185 +/** Decrypt skb. Skips esp header and iv.
  18.186 + * Assumes skb->data points at esp header.
  18.187 + *
  18.188 + * @param esp esp state
  18.189 + * @parm esph esp header
  18.190 + * @param skb packet
  18.191 + * @param head_n size of esp header and iv
  18.192 + * @param iv_n size of iv
  18.193 + * @param text_n size of ciphertext
  18.194 + * @return 0 on success, error code otherwise
  18.195 + */
  18.196 +static int esp_sa_decrypt(ESPState *esp, ESPHdr *esph, struct sk_buff *skb,
  18.197 +                   int head_n, int iv_n, int text_n){
  18.198 +    int err = 0;
  18.199 +    int sg_n = skb_shinfo(skb)->nr_frags + 1;
  18.200 +    struct scatterlist sg[sg_n];
  18.201 +
  18.202 +    err = skb_scatterlist(skb, sg, &sg_n, head_n, text_n);
  18.203 +    if(err) goto exit;
  18.204 +    if(iv_n){
  18.205 +        crypto_cipher_set_iv(esp->cipher.tfm, esph->data, iv_n);
  18.206 +    }
  18.207 +    crypto_cipher_decrypt(esp->cipher.tfm, sg, sg, text_n);
  18.208 +  exit:
  18.209 +    return err;
  18.210 +}
  18.211 +
  18.212 +/** Compute icv. Includes esp header, iv and ciphertext.
  18.213 + * Assumes skb->data points at esp header.
  18.214 + *
  18.215 + * @param esp esp state
  18.216 + * @param skb packet
  18.217 + * @param digest_n number of bytes to digest
  18.218 + * @param icv_n size of icv
  18.219 + * @return 0 on success, error code otherwise
  18.220 + */
  18.221 +static int esp_sa_digest(ESPState *esp, struct sk_buff *skb, int digest_n, int icv_n){
  18.222 +    int err = 0;
  18.223 +    u8 icv[icv_n];
  18.224 +    
  18.225 +    if(DEBUG_ICV){
  18.226 +        dprintf("> skb digest_n=%d icv_n=%d\n", digest_n, icv_n);
  18.227 +        skb_print_bits(skb, 0, digest_n);
  18.228 +    }
  18.229 +    memset(icv, 0, icv_n);
  18.230 +    esp->digest.icv(esp, skb, 0, digest_n, icv);
  18.231 +    skb_put_bits(skb, digest_n, icv, icv_n);
  18.232 +    return err;
  18.233 +}
  18.234 +
  18.235 +/** Check the icv and trim it from the skb tail.
  18.236 + *
  18.237 + * @param sa sa state
  18.238 + * @param esp esp state
  18.239 + * @param esph esp header
  18.240 + * @param skb packet
  18.241 + * @return 0 on success, error code otherwise
  18.242 + */
  18.243 +static int esp_check_icv(SAState *sa, ESPState *esp, ESPHdr *esph, struct sk_buff *skb){
  18.244 +    int err = 0;
  18.245 +    int icv_n = esp->digest.icv_n;
  18.246 +    int digest_n = skb->len - icv_n;
  18.247 +    u8 icv_skb[icv_n];
  18.248 +    u8 icv_new[icv_n];
  18.249 +
  18.250 +    dprintf(">\n");
  18.251 +    if(DEBUG_ICV){
  18.252 +        dprintf("> skb len=%d digest_n=%d icv_n=%d\n",
  18.253 +                skb->len, digest_n, icv_n);
  18.254 +        skb_print_bits(skb, 0, skb->len);
  18.255 +    }
  18.256 +    if(skb_copy_bits(skb, digest_n, icv_skb, icv_n)){
  18.257 +        wprintf("> Error getting icv from skb\n");
  18.258 +        goto exit;
  18.259 +    }
  18.260 +    esp->digest.icv(esp, skb, 0, digest_n, icv_new);
  18.261 +    if(DEBUG_ICV){
  18.262 +        dprintf("> len=%d icv_n=%d", digest_n, icv_n);
  18.263 +        printk("\nskb="); buf_print(icv_skb, icv_n);
  18.264 +        printk("new="); buf_print(icv_new, icv_n);
  18.265 +    }
  18.266 +    if(unlikely(memcmp(icv_new, icv_skb, icv_n))){
  18.267 +        wprintf("> ICV check failed!\n");
  18.268 +        err = -EINVAL;
  18.269 +        sa->counts.integrity_failures++;
  18.270 +        goto exit;
  18.271 +    }
  18.272 +    skb_trim_tail(skb, icv_n);
  18.273 +  exit:
  18.274 +    dprintf("< err=%d\n", err);
  18.275 +    return err;
  18.276 +}
  18.277 +
  18.278 +/** Send a packet via an ESP SA.
  18.279 + *
  18.280 + * @param sa SA state
  18.281 + * @param skb packet to send
  18.282 + * @param tunnel underlying tunnel
  18.283 + * @return 0 on success, negative error code otherwise
  18.284 + */
  18.285 +static int esp_sa_send(SAState *sa, struct sk_buff *skb, Tunnel *tunnel){
  18.286 +    int err = 0;
  18.287 +    int ip_n;           // Size of ip header.
  18.288 +    int plaintext_n;	// Size of plaintext.
  18.289 +    int ciphertext_n;   // Size of ciphertext (including padding).
  18.290 +    int extra_n;        // Extra bytes needed for ciphertext.
  18.291 +    int icv_n = 0;      // Size of integrity check value (icv).
  18.292 +    int iv_n = 0;       // Size of initialization vector (iv).
  18.293 +    int head_n;         // Size of esp header and iv.
  18.294 +    int tail_n;         // Size of esp trailer: padding and icv.
  18.295 +    ESPState  *esp;
  18.296 +    ESPHdr *esph;
  18.297 +
  18.298 +    dprintf(">\n");
  18.299 +    esp = sa->data;
  18.300 +    ip_n = (skb->nh.iph->ihl << 2);
  18.301 +    // Assuming skb->data points at ethernet header, exclude ethernet
  18.302 +    // header and IP header.
  18.303 +    plaintext_n = skb->len - ETH_HLEN - ip_n;
  18.304 +    // Add size of padding fields.
  18.305 +    ciphertext_n = roundup(plaintext_n + ESP_PAD_N, esp->cipher.block_n);
  18.306 +    if(esp->cipher.pad_n > 0){
  18.307 +        ciphertext_n = roundup(ciphertext_n, esp->cipher.pad_n);
  18.308 +    }
  18.309 +    extra_n = ciphertext_n - plaintext_n;
  18.310 +    iv_n = esp->cipher.iv_n;
  18.311 +    icv_n = esp->digest.icv_n;
  18.312 +    dprintf("> len=%d plaintext=%d ciphertext=%d extra=%d\n",
  18.313 +            skb->len, plaintext_n, ciphertext_n, extra_n);
  18.314 +    dprintf("> iv=%d icv=%d\n", iv_n, icv_n);
  18.315 +    skb_print_bits(skb, 0, skb->len);
  18.316 +
  18.317 +    // Add headroom for esp header and iv, tailroom for the ciphertext
  18.318 +    // and icv.
  18.319 +    head_n = ESP_HDR_N + iv_n;
  18.320 +    tail_n = extra_n + icv_n;
  18.321 +    err = skb_make_room(&skb, skb, head_n, tail_n);
  18.322 +    if(err) goto exit;
  18.323 +    dprintf("> skb=%p\n", skb);
  18.324 +    // Move the headers up to make space for the esp header.  We can
  18.325 +    // use memmove() since all this data fits in the skb head.
  18.326 +    // todo: Can't assume this anymore?
  18.327 +    dprintf("> header push...\n");
  18.328 +    __skb_push(skb, head_n);
  18.329 +    if(0 && skb->mac.raw){
  18.330 +        dprintf("> skb->mac=%p\n", skb->mac.raw);
  18.331 +        dprintf("> ETH header pull...\n");
  18.332 +        memmove(skb->data, skb->mac.raw, ETH_HLEN);
  18.333 +        skb->mac.raw = skb->data; 
  18.334 +        __skb_pull(skb, ETH_HLEN);
  18.335 +    }
  18.336 +    dprintf("> IP header pull...\n");
  18.337 +    memmove(skb->data, skb->nh.raw, ip_n);
  18.338 +    skb->nh.raw = skb->data;
  18.339 +    __skb_pull(skb, ip_n);
  18.340 +    esph = (void*)skb->data;
  18.341 +    // Add spi and sequence number.
  18.342 +    esph->spi = sa->ident.spi;
  18.343 +    esph->seq = htonl(++sa->replay.send_seq);
  18.344 +    // Insert the padding bytes: extra bytes less the pad fields
  18.345 +    // themselves.
  18.346 +    dprintf("> esp_sa_pad ...\n");
  18.347 +    esp_sa_pad(skb, icv_n, extra_n, skb->nh.iph->protocol);
  18.348 +    if(sa->security & SA_CONF){
  18.349 +        dprintf("> esp_sa_encrypt...\n");
  18.350 +        err = esp_sa_encrypt(esp, esph, skb, head_n, iv_n, ciphertext_n);
  18.351 +        if(err) goto exit;
  18.352 +    }
  18.353 +    if(icv_n){
  18.354 +        dprintf("> esp_sa_digest...\n");
  18.355 +        err = esp_sa_digest(esp, skb, head_n + ciphertext_n, icv_n);
  18.356 +        if(err) goto exit;
  18.357 +    }
  18.358 +    dprintf("> IP header push...\n");
  18.359 +    __skb_push(skb, ip_n);
  18.360 +    if(0 && skb->mac.raw){
  18.361 +        dprintf("> ETH header push...\n");
  18.362 +        __skb_push(skb, ETH_HLEN);
  18.363 +    }
  18.364 +    // Fix ip header. Adjust length field, set protocol, zero
  18.365 +    // checksum.
  18.366 +    {
  18.367 +        // Total packet length (bytes).
  18.368 +        int tot_len = ntohs(skb->nh.iph->tot_len);
  18.369 +        tot_len += head_n;
  18.370 +        tot_len += tail_n;
  18.371 +        skb->nh.iph->protocol = IPPROTO_ESP;
  18.372 +        skb->nh.iph->tot_len  = htons(tot_len);
  18.373 +        skb->nh.iph->check    = 0;
  18.374 +    }
  18.375 +    err = Tunnel_send(tunnel, skb);
  18.376 +  exit:
  18.377 +    dprintf("< err=%d\n", err);
  18.378 +    return err;
  18.379 +}
  18.380 +
  18.381 +/** Release an skb context.
  18.382 + * Drops the refcount on the SA.
  18.383 + *
  18.384 + * @param context to free
  18.385 + */
  18.386 +static void esp_context_free_fn(SkbContext *context){
  18.387 +    SAState *sa;
  18.388 +    if(!context) return;
  18.389 +    sa = context->data;
  18.390 +    if(!sa) return;
  18.391 +    context->data = NULL;
  18.392 +    SAState_decref(sa);
  18.393 +}   
  18.394 +
  18.395 +/** Receive a packet via an ESP SA.
  18.396 + * Does ESP receive processing (check icv, decrypt), strips
  18.397 + * ESP header and re-receives.
  18.398 + *
  18.399 + * @param sa SA
  18.400 + * @param skb packet
  18.401 + * @return 0 on success, negative error code otherwise
  18.402 + */
  18.403 +static int esp_sa_recv(SAState *sa, struct sk_buff *skb){
  18.404 +    int err = -EINVAL;
  18.405 +    int mine = 0;
  18.406 +    int vnet = 0; //todo: fixme - need to record skb vnet somewhere
  18.407 +    ESPState *esp;
  18.408 +    ESPHdr *esph;
  18.409 +    ESPPadding *pad;
  18.410 +    int block_n;	// Cipher blocksize.
  18.411 +    int icv_n;          // Size of integrity check value (icv).
  18.412 +    int iv_n;           // Size of initialization vector (iv).
  18.413 +    int text_n;         // Size of text (ciphertext or plaintext).
  18.414 +    int head_n;         // Size of esp header and iv.
  18.415 +
  18.416 +    dprintf("> skb=%p\n", skb);
  18.417 +    // Assumes skb->data points at esp hdr.
  18.418 +    esph = (void*)skb->data;
  18.419 +    esp = sa->data;
  18.420 +    block_n = crypto_tfm_alg_blocksize(esp->cipher.tfm);
  18.421 +    icv_n = esp->digest.icv_n;
  18.422 +    iv_n = esp->cipher.iv_n;
  18.423 +    head_n = ESP_HDR_N + iv_n;
  18.424 +    text_n = skb->len - head_n - icv_n;
  18.425 +    if(text_n < ESP_PAD_N || !multipleof(text_n, block_n)){
  18.426 +        wprintf("> Invalid size: text_n=%d tfm:block_n=%d esp:block_n=%d\n",
  18.427 +                text_n, block_n, esp->cipher.block_n);
  18.428 +        goto exit;
  18.429 +    }
  18.430 +    if(icv_n){
  18.431 +        err = esp_check_icv(sa, esp, esph, skb);
  18.432 +        if(err) goto exit;
  18.433 +    }
  18.434 +    mine = 1;
  18.435 +    if(sa->security & SA_CONF){
  18.436 +        err = esp_sa_decrypt(esp, esph, skb, head_n, iv_n, text_n);
  18.437 +        if(err) goto exit;
  18.438 +    }
  18.439 +    // Strip esp header by moving the other headers down.
  18.440 +    //todo Maybe not safe to do this anymore.
  18.441 +    memmove(skb->mac.raw + head_n, skb->mac.raw, (skb->data - skb->mac.raw));
  18.442 +    skb->mac.raw += head_n;
  18.443 +    skb->nh.raw  += head_n;
  18.444 +    // Move skb->data back to ethernet header.
  18.445 +    // Do in 2 moves to ensure offsets are +ve,
  18.446 +    // since args to skb_pull/skb_push are unsigned.
  18.447 +    __skb_pull(skb, head_n);
  18.448 +    __skb_push(skb, skb->data - skb->mac.raw);
  18.449 +    // After this esph is invalid.
  18.450 +    esph = NULL;
  18.451 +    // Trim padding, restore protocol in IP header.
  18.452 +    pad = skb_trim_tail(skb, ESP_PAD_N);
  18.453 +    text_n -= ESP_PAD_N;
  18.454 +    if((pad->pad_n > 255) | (pad->pad_n > text_n)){
  18.455 +        wprintf("> Invalid padding: pad_n=%d text_n=%d\n", pad->pad_n, text_n);
  18.456 +        goto exit;
  18.457 +    }
  18.458 +    skb_trim_tail(skb, pad->pad_n);
  18.459 +    skb->nh.iph->protocol = pad->protocol;
  18.460 +    err = skb_push_context(skb, vnet, sa->ident.addr, IPPROTO_ESP,
  18.461 +                           sa, esp_context_free_fn);
  18.462 +    if(err) goto exit;
  18.463 +    // Increase sa refcount now the skb context refers to it.
  18.464 +    SAState_incref(sa);
  18.465 +    err = netif_rx(skb);
  18.466 +  exit:
  18.467 +    if(mine) err = 1;
  18.468 +    dprintf("< skb=%p err=%d\n", skb, err);
  18.469 +    return err;
  18.470 +}
  18.471 +
  18.472 +/** Estimate the packet size for some data using ESP processing.    
  18.473 + *
  18.474 + * @param sa ESP SA
  18.475 + * @param data_n data size
  18.476 + * @return size after ESP processing
  18.477 + */
  18.478 +static u32 esp_sa_size(SAState *sa, int data_n){
  18.479 +    // Even in transport mode have to round up to blocksize.
  18.480 +    // Have to add some padding for alignment even if pad_n is zero.
  18.481 +    ESPState *esp = sa->data;
  18.482 +    
  18.483 +    data_n = roundup(data_n + ESP_PAD_N, esp->cipher.block_n);
  18.484 +    if(esp->cipher.pad_n > 0){
  18.485 +        data_n = roundup(data_n, esp->cipher.pad_n);
  18.486 +    }
  18.487 +    data_n += esp->digest.icv_n;
  18.488 +    //data_n += esp->cipher.iv_n;
  18.489 +    data_n += ESP_HDR_N;
  18.490 +    return data_n;
  18.491 +}
  18.492 +
  18.493 +/** Compute an icv using HMAC digest.
  18.494 + *
  18.495 + * @param esp ESP state
  18.496 + * @param skb packet to digest
  18.497 + * @param offset offset to start at
  18.498 + * @param len number of bytes to digest
  18.499 + * @param icv return parameter for ICV
  18.500 + * @return 0 on success, negative error code otherwise
  18.501 + */
  18.502 +static inline void esp_hmac_digest(ESPState *esp, struct sk_buff *skb,
  18.503 +                                   int offset, int len, u8 *icv){
  18.504 +    int err = 0;
  18.505 +    struct crypto_tfm *digest = esp->digest.tfm;
  18.506 +    char *icv_tmp = esp->digest.icv_tmp;
  18.507 +    int sg_n = skb_shinfo(skb)->nr_frags + 1;
  18.508 +    struct scatterlist sg[sg_n];
  18.509 +
  18.510 +    dprintf("> offset=%d len=%d\n", offset, len);
  18.511 +    memset(icv, 0, esp->digest.icv_n);
  18.512 +    if(DEBUG_ICV){
  18.513 +        dprintf("> key len=%d\n", esp->digest.key_n);
  18.514 +        printk("\nkey=");
  18.515 +        buf_print(esp->digest.key,esp->digest.key_n); 
  18.516 +    }
  18.517 +    crypto_hmac_init(digest, esp->digest.key, &esp->digest.key_n);
  18.518 +    err = skb_scatterlist(skb, sg, &sg_n, offset, len);
  18.519 +    crypto_hmac_update(digest, sg, sg_n);
  18.520 +    crypto_hmac_final(digest, esp->digest.key, &esp->digest.key_n, icv_tmp);
  18.521 +    if(DEBUG_ICV){
  18.522 +        dprintf("> digest len=%d ", esp->digest.icv_n);
  18.523 +        printk("\nval=");
  18.524 +        buf_print(icv_tmp, esp->digest.icv_n);
  18.525 +    }
  18.526 +    memcpy(icv, icv_tmp, esp->digest.icv_n);
  18.527 +    dprintf("<\n");
  18.528 +}
  18.529 +
  18.530 +/** Finish up an esp state.
  18.531 + * Releases the digest, cipher, iv and frees the state.
  18.532 + *
  18.533 + * @parma esp state
  18.534 + */
  18.535 +static void esp_fini(ESPState *esp){
  18.536 +    if(!esp) return;
  18.537 +    if(esp->digest.tfm){
  18.538 +        crypto_free_tfm(esp->digest.tfm);
  18.539 +        esp->digest.tfm = NULL; 
  18.540 +    }
  18.541 +    if(esp->digest.icv_tmp){
  18.542 +        kfree(esp->digest.icv_tmp);
  18.543 +        esp->digest.icv_tmp = NULL;
  18.544 +    }
  18.545 +    if(esp->cipher.tfm){
  18.546 +        crypto_free_tfm(esp->cipher.tfm);
  18.547 +        esp->cipher.tfm = NULL;
  18.548 +    }
  18.549 +    if(esp->cipher.iv){
  18.550 +        kfree(esp->cipher.iv);
  18.551 +        esp->cipher.iv = NULL;
  18.552 +    }
  18.553 +    kfree(esp);
  18.554 +}
  18.555 +
  18.556 +/** Release an ESP SA.
  18.557 + *
  18.558 + * @param sa ESO SA
  18.559 + */
  18.560 +static void esp_sa_fini(SAState *sa){
  18.561 +    ESPState *esp;
  18.562 +    if(!sa) return;
  18.563 +    esp = sa->data;
  18.564 +    if(!esp) return;
  18.565 +    esp_fini(esp);
  18.566 +    sa->data = NULL;
  18.567 +}
  18.568 +
  18.569 +/** Initialize the cipher for an ESP SA.
  18.570 + *
  18.571 + * @param sa ESP SA
  18.572 + * @param esp ESP state
  18.573 + * @return 0 on success, negative error code otherwise
  18.574 + */
  18.575 +static int esp_cipher_init(SAState *sa, ESPState *esp){
  18.576 +    int err = 0; 
  18.577 +    SAAlgorithm *algo = NULL;
  18.578 +    int cipher_mode = CRYPTO_TFM_MODE_CBC;
  18.579 +
  18.580 +    dprintf("> sa=%p esp=%p\n", sa, esp);
  18.581 +    dprintf("> cipher=%s\n", sa->cipher.name);
  18.582 +    algo = sa_cipher_by_name(sa->cipher.name);
  18.583 +    if(!algo){
  18.584 +        wprintf("> Cipher unavailable: %s\n", sa->cipher.name);
  18.585 +        err = -EINVAL;
  18.586 +        goto exit;
  18.587 +    }
  18.588 +    esp->cipher.key_n = roundup(sa->cipher.bits, 8);
  18.589 +    // If cipher is null must use ECB because CBC algo does not support blocksize 1.
  18.590 +    if(strcmp(sa->cipher.name, "cipher_null")){
  18.591 +        cipher_mode = CRYPTO_TFM_MODE_ECB;
  18.592 +    }
  18.593 +    esp->cipher.tfm = crypto_alloc_tfm(sa->cipher.name, cipher_mode);
  18.594 +    if(!esp->cipher.tfm){
  18.595 +        err = -ENOMEM;
  18.596 +        goto exit;
  18.597 +    }
  18.598 +    esp->cipher.block_n = roundup(crypto_tfm_alg_blocksize(esp->cipher.tfm), 4);
  18.599 +    esp->cipher.iv_n = crypto_tfm_alg_ivsize(esp->cipher.tfm);
  18.600 +    esp->cipher.pad_n = 0;
  18.601 +    if(esp->cipher.iv_n){
  18.602 +        esp->cipher.iv = kmalloc(esp->cipher.iv_n, GFP_KERNEL);
  18.603 +        get_random_bytes(esp->cipher.iv, esp->cipher.iv_n);
  18.604 +    }
  18.605 +    crypto_cipher_setkey(esp->cipher.tfm, esp->cipher.key, esp->cipher.key_n);
  18.606 +    err = 0;
  18.607 +  exit:
  18.608 +    dprintf("< err=%d\n", err);
  18.609 +    return err;
  18.610 +}
  18.611 +
  18.612 +/** Initialize the digest for an ESP SA.
  18.613 + *
  18.614 + * @param sa ESP SA
  18.615 + * @param esp ESP state
  18.616 + * @return 0 on success, negative error code otherwise
  18.617 + */
  18.618 +static int esp_digest_init(SAState *sa, ESPState *esp){
  18.619 +    int err = 0;
  18.620 +    SAAlgorithm *algo = NULL;
  18.621 +    
  18.622 +    dprintf(">\n");
  18.623 +    esp->digest.key = sa->digest.key;
  18.624 +    esp->digest.key_n = bits_to_bytes(roundup(sa->digest.bits, 8));
  18.625 +    esp->digest.tfm = crypto_alloc_tfm(sa->digest.name, 0);
  18.626 +    if(!esp->digest.tfm){
  18.627 +        err = -ENOMEM;
  18.628 +        goto exit;
  18.629 +    }
  18.630 +    algo = sa_digest_by_name(sa->digest.name);
  18.631 +    if(!algo){
  18.632 +        wprintf("> Digest unavailable: %s\n", sa->digest.name);
  18.633 +        err = -EINVAL;
  18.634 +        goto exit;
  18.635 +    }
  18.636 +    esp->digest.icv = esp_hmac_digest;
  18.637 +    esp->digest.icv_full_n = bits_to_bytes(algo->info.digest.icv_fullbits);
  18.638 +    esp->digest.icv_n = bits_to_bytes(algo->info.digest.icv_truncbits);
  18.639 +    
  18.640 +    if(esp->digest.icv_full_n != crypto_tfm_alg_digestsize(esp->digest.tfm)){
  18.641 +        err = -EINVAL;
  18.642 +        wprintf("> digest %s, size %u != %hu\n",
  18.643 +                sa->digest.name,
  18.644 +                crypto_tfm_alg_digestsize(esp->digest.tfm),
  18.645 +                esp->digest.icv_full_n);
  18.646 +        goto exit;
  18.647 +    }
  18.648 +    
  18.649 +    esp->digest.icv_tmp = kmalloc(esp->digest.icv_full_n, GFP_KERNEL);
  18.650 +    if(!esp->digest.icv_tmp){
  18.651 +        err = -ENOMEM;
  18.652 +        goto exit;
  18.653 +    }
  18.654 +  exit:
  18.655 +    dprintf("< err=%d\n", err);
  18.656 +    return err;
  18.657 +}
  18.658 +
  18.659 +/** Initialize an ESP SA.
  18.660 + *
  18.661 + * @param sa ESP SA
  18.662 + * @param args arguments
  18.663 + * @return 0 on success, negative error code otherwise
  18.664 + */
  18.665 +static int esp_sa_init(SAState *sa, void *args){
  18.666 +    int err = 0;
  18.667 +    ESPState *esp = NULL;
  18.668 +    
  18.669 +    dprintf("> sa=%p\n", sa);
  18.670 +    esp = kmalloc(sizeof(*esp), GFP_KERNEL);
  18.671 +    if(!esp){
  18.672 +        err = -ENOMEM;
  18.673 +        goto exit;
  18.674 +    }
  18.675 +    *esp = (ESPState){};
  18.676 +    err = esp_cipher_init(sa, esp);
  18.677 +    if(err) goto exit;
  18.678 +    err = esp_digest_init(sa, esp);
  18.679 +    if(err) goto exit;
  18.680 +    sa->data = esp;
  18.681 +  exit:
  18.682 +    if(err){
  18.683 +        if(esp) esp_fini(esp);
  18.684 +    }
  18.685 +    dprintf("< err=%d\n", err);
  18.686 +    return err;
  18.687 +}
  18.688 +
  18.689 +/** SA type for ESP.
  18.690 + */
  18.691 +static SAType esp_sa_type = {
  18.692 +    .name     = "ESP",
  18.693 +    .protocol = IPPROTO_ESP,
  18.694 +    .init     = esp_sa_init,
  18.695 +    .fini     = esp_sa_fini,
  18.696 +    .size     = esp_sa_size,
  18.697 +    .recv     = esp_sa_recv,
  18.698 +    .send     = esp_sa_send
  18.699 +};
  18.700 +
  18.701 +/** Get the ESP header from a packet.
  18.702 + *
  18.703 + * @param skb packet
  18.704 + * @param esph return parameter for header
  18.705 + * @return 0 on success, negative error code otherwise
  18.706 + */
  18.707 +static int esp_skb_header(struct sk_buff *skb, ESPHdr **esph){
  18.708 +    int err = 0;
  18.709 +    if(skb->len < ESP_HDR_N){
  18.710 +        err = -EINVAL;
  18.711 +        goto exit;
  18.712 +    }
  18.713 +    *esph = (ESPHdr*)skb->data;
  18.714 +  exit:
  18.715 +    return err;
  18.716 +}
  18.717 +
  18.718 +/** Handle an incoming skb with ESP protocol.
  18.719 + *
  18.720 + * Lookup spi, if state found hand to the state.
  18.721 + * If no state, check spi, if ok, create state and pass to it.
  18.722 + * If spi not ok, drop.
  18.723 + * 
  18.724 + * @param skb packet
  18.725 + * @return 0 on sucess, negative error code otherwise
  18.726 + */
  18.727 +static int esp_protocol_recv(struct sk_buff *skb){
  18.728 +    int err = 0;
  18.729 +    const int eth_n = ETH_HLEN;
  18.730 +    int ip_n;
  18.731 +    ESPHdr *esph = NULL;
  18.732 +    SAState *sa = NULL;
  18.733 +    u32 addr;
  18.734 +    
  18.735 +    dprintf(">\n");
  18.736 +    dprintf("> recv skb=\n"); skb_print_bits(skb, 0, skb->len);
  18.737 +    ip_n = (skb->nh.iph->ihl << 2);
  18.738 +    if(skb->data == skb->mac.raw){
  18.739 +        // skb->data points at ethernet header.
  18.740 +        if (!pskb_may_pull(skb, eth_n + ip_n)){
  18.741 +            wprintf("> Malformed skb\n");
  18.742 +            err = -EINVAL;
  18.743 +            goto exit;
  18.744 +        }
  18.745 +        skb_pull(skb, eth_n + ip_n);
  18.746 +    }
  18.747 +    addr = skb->nh.iph->daddr;
  18.748 +    err = esp_skb_header(skb, &esph);
  18.749 +    if(err) goto exit;
  18.750 +    dprintf("> spi=%08x protocol=%d addr=" IPFMT "\n",
  18.751 +            esph->spi, IPPROTO_ESP, NIPQUAD(addr));
  18.752 +    sa = sa_table_lookup_spi(esph->spi, IPPROTO_ESP, addr);
  18.753 +    if(!sa){
  18.754 +        err = vnet_sa_create(esph->spi, IPPROTO_ESP, addr, &sa);
  18.755 +        if(err) goto exit;
  18.756 +    }
  18.757 +    err = SAState_recv(sa, skb);
  18.758 +  exit:
  18.759 +    if(sa) SAState_decref(sa);
  18.760 +    dprintf("< err=%d\n", err);
  18.761 +    return err;
  18.762 +}
  18.763 +
  18.764 +/** Handle an ICMP error related to ESP.
  18.765 + *
  18.766 + * @param skb ICMP error packet
  18.767 + * @param info
  18.768 + */
  18.769 +static void esp_protocol_icmp_err(struct sk_buff *skb, u32 info){
  18.770 +    struct iphdr *iph = (struct iphdr*)skb->data;
  18.771 +    ESPHdr *esph;
  18.772 +    SAState *sa;
  18.773 +    
  18.774 +    dprintf("> ICMP error type=%d code=%d\n",
  18.775 +            skb->h.icmph->type, skb->h.icmph->code);
  18.776 +    if(skb->h.icmph->type != ICMP_DEST_UNREACH ||
  18.777 +       skb->h.icmph->code != ICMP_FRAG_NEEDED){
  18.778 +        return;
  18.779 +    }
  18.780 +    
  18.781 +    //todo: need to check skb has enough len to do this.
  18.782 +    esph = (ESPHdr*)(skb->data + (iph->ihl << 2));
  18.783 +    sa = sa_table_lookup_spi(esph->spi, IPPROTO_ESP, iph->daddr);
  18.784 +    if(!sa) return;
  18.785 +    wprintf("> ICMP unreachable on SA ESP spi=%08x addr=" IPFMT "\n",
  18.786 +            ntohl(esph->spi), NIPQUAD(iph->daddr));
  18.787 +    SAState_decref(sa);
  18.788 +}
  18.789 +
  18.790 +//============================================================================
  18.791 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
  18.792 +// Code for 2.6 kernel.
  18.793 +
  18.794 +/** Protocol handler for ESP.
  18.795 + */
  18.796 +static struct net_protocol esp_protocol = {
  18.797 +    .handler     = esp_protocol_recv,
  18.798 +    .err_handler = esp_protocol_icmp_err
  18.799 +};
  18.800 +
  18.801 +static int esp_protocol_add(void){
  18.802 +    return inet_add_protocol(&esp_protocol, IPPROTO_ESP);
  18.803 +}
  18.804 +
  18.805 +static int esp_protocol_del(void){
  18.806 +    return inet_del_protocol(&esp_protocol, IPPROTO_ESP);
  18.807 +}
  18.808 +
  18.809 +//============================================================================
  18.810 +#else
  18.811 +//============================================================================
  18.812 +// Code for 2.4 kernel.
  18.813 +
  18.814 +/** Protocol handler for ESP.
  18.815 + */
  18.816 +static struct inet_protocol esp_protocol = {
  18.817 +    .name        = "ESP",
  18.818 +    .protocol    = IPPROTO_ESP,
  18.819 +    .handler     = esp_protocol_recv,
  18.820 +    .err_handler = esp_protocol_icmp_err
  18.821 +};
  18.822 +
  18.823 +static int esp_protocol_add(void){
  18.824 +    inet_add_protocol(&esp_protocol);
  18.825 +    return 0;
  18.826 +}
  18.827 +
  18.828 +static int esp_protocol_del(void){
  18.829 +    return inet_del_protocol(&esp_protocol);
  18.830 +}
  18.831 +
  18.832 +#endif
  18.833 +//============================================================================
  18.834 +
  18.835 +
  18.836 +/** Initialize the ESP module.
  18.837 + * Registers the ESP protocol and SA type.
  18.838 + *
  18.839 + * @return 0 on success, negative error code otherwise
  18.840 + */
  18.841 +int __init esp_module_init(void){
  18.842 +    int err = 0;
  18.843 +    dprintf(">\n");
  18.844 +    err = SAType_add(&esp_sa_type);
  18.845 +    if(err < 0){
  18.846 +        eprintf("> Error adding esp sa type\n");
  18.847 +        goto exit;
  18.848 +    }
  18.849 +    esp_protocol_add();
  18.850 +  exit:
  18.851 +    dprintf("< err=%d\n", err);
  18.852 +    return err;
  18.853 +}
  18.854 +
  18.855 +/** Finalize the ESP module.
  18.856 + * Deregisters the ESP protocol and SA type.
  18.857 + */
  18.858 +void __exit esp_module_exit(void){
  18.859 +    if(esp_protocol_del() < 0){
  18.860 +        eprintf("> Error removing esp protocol\n");
  18.861 +    }
  18.862 +    if(SAType_del(&esp_sa_type) < 0){
  18.863 +        eprintf("> Error removing esp sa type\n");
  18.864 +    }
  18.865 +}
  18.866 +
    19.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    19.2 +++ b/tools/vnet/vnet-module/esp.h	Mon Nov 22 16:49:15 2004 +0000
    19.3 @@ -0,0 +1,111 @@
    19.4 +/*
    19.5 + * Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
    19.6 + *
    19.7 + * This program is free software; you can redistribute it and/or modify
    19.8 + * it under the terms of the GNU General Public License as published by the 
    19.9 + * Free Software Foundation; either version 2 of the License, or (at your
   19.10 + * option) any later version.
   19.11 + * 
   19.12 + * This program is distributed in the hope that it will be useful, but
   19.13 + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
   19.14 + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
   19.15 + * for more details.
   19.16 + *
   19.17 + * You should have received a copy of the GNU General Public License along
   19.18 + * with this program; if not, write to the Free software Foundation, Inc.,
   19.19 + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA
   19.20 + *
   19.21 + */
   19.22 +#ifndef __VNET_ESP_H__
   19.23 +#define __VNET_ESP_H__
   19.24 +
   19.25 +#include <linux/config.h>
   19.26 +#include <linux/types.h>
   19.27 +#include <linux/crypto.h>
   19.28 +
   19.29 +/** Header used by IPSEC ESP (Encapsulated Security Payload). */
   19.30 +typedef struct ESPHdr {
   19.31 +    /** The spi (security parameters index). */
   19.32 +    u32 spi;
   19.33 +    /** Sequence number. */
   19.34 +    u32 seq;
   19.35 +    /* Variable length data (depends on crypto suite).
   19.36 +       Mind the 64 bit alignment! */
   19.37 +    u8  data[0];
   19.38 +} ESPHdr;
   19.39 +
   19.40 +/** Padding trailer used by IPSEC ESP.
   19.41 + * Follows the padding itself with the padding length and the
   19.42 + * protocol being encapsulated.
   19.43 + */
   19.44 +typedef struct ESPPadding {
   19.45 +    u8 pad_n;
   19.46 +    u8 protocol;
   19.47 +} ESPPadding;
   19.48 +
   19.49 +/** Size of the esp header (spi and seq). */
   19.50 +static const int ESP_HDR_N = sizeof(ESPHdr);
   19.51 +
   19.52 +/** Size of the esp pad and next protocol field. */
   19.53 +static const int ESP_PAD_N = sizeof(ESPPadding);
   19.54 +
   19.55 +enum {
   19.56 +    SASTATE_VOID,
   19.57 +    SASTATE_ACQUIRE,
   19.58 +    SASTATE_VALID,
   19.59 +    SASTATE_ERROR,
   19.60 +    SASTATE_EXPIRED,
   19.61 +    SASTATE_DEAD,
   19.62 +};
   19.63 +
   19.64 +struct ESPState;
   19.65 +
   19.66 +/** A cipher instance. */
   19.67 +typedef struct ESPCipher {
   19.68 +    /** Cipher key. */
   19.69 +    u8 *key;
   19.70 +    /** Key size (bytes). */
   19.71 +    int key_n;
   19.72 +    /** Initialization vector (IV). */
   19.73 +    u8 *iv;
   19.74 +    /** IV size (bytes). */
   19.75 +    int iv_n;
   19.76 +    /** Block size for padding (bytes). */
   19.77 +    int pad_n;
   19.78 +    /** Cipher block size (bytes). */
   19.79 +    int block_n;
   19.80 +    /** Cipher crypto transform. */
   19.81 +    struct crypto_tfm *tfm;
   19.82 +} ESPCipher;
   19.83 +
   19.84 +/** A digest instance. */
   19.85 +typedef struct ESPDigest {
   19.86 +    /** Digest key. */
   19.87 +    u8 *key;
   19.88 +    /** Key size (bytes) */
   19.89 +    int key_n;
   19.90 +    /** ICV size used (bytes). */
   19.91 +    u8 icv_n;
   19.92 +    /** Full ICV size when computed (bytes). */
   19.93 +    u8 icv_full_n;
   19.94 +    /** Working storage for computing ICV. */
   19.95 +    u8 *icv_tmp;
   19.96 +    /** Function used to compute ICV (e.g. HMAC). */
   19.97 +    void (*icv)(struct ESPState *esp,
   19.98 +                struct sk_buff *skb,
   19.99 +                int offset,
  19.100 +                int len,
  19.101 +                u8 *icv);
  19.102 +    /** Digest crypto transform (e.g. SHA). */
  19.103 +    struct crypto_tfm *tfm;
  19.104 +} ESPDigest;
  19.105 +
  19.106 +typedef struct ESPState {
  19.107 +    struct ESPCipher cipher;
  19.108 +    struct ESPDigest digest;
  19.109 +} ESPState;
  19.110 +
  19.111 +extern int esp_module_init(void);
  19.112 +extern void esp_module_exit(void);
  19.113 +
  19.114 +#endif /* !__VNET_ESP_H__ */
    20.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    20.2 +++ b/tools/vnet/vnet-module/etherip.c	Mon Nov 22 16:49:15 2004 +0000
    20.3 @@ -0,0 +1,411 @@
    20.4 +/*
    20.5 + * Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
    20.6 + *
    20.7 + * This program is free software; you can redistribute it and/or modify
    20.8 + * it under the terms of the GNU General Public License as published by the 
    20.9 + * Free Software Foundation; either version 2 of the License, or (at your
   20.10 + * option) any later version.
   20.11 + * 
   20.12 + * This program is distributed in the hope that it will be useful, but
   20.13 + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
   20.14 + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
   20.15 + * for more details.
   20.16 + *
   20.17 + * You should have received a copy of the GNU General Public License along
   20.18 + * with this program; if not, write to the Free software Foundation, Inc.,
   20.19 + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA
   20.20 + *
   20.21 + */
   20.22 +#include <linux/config.h>
   20.23 +#include <linux/module.h>
   20.24 +#include <linux/types.h>
   20.25 +#include <linux/kernel.h>
   20.26 +#include <linux/init.h>
   20.27 +
   20.28 +#include <linux/version.h>
   20.29 +
   20.30 +#include <linux/skbuff.h>
   20.31 +#include <linux/net.h>
   20.32 +#include <linux/netdevice.h>
   20.33 +#include <linux/in.h>
   20.34 +#include <linux/inet.h>
   20.35 +#include <linux/netfilter_ipv4.h>
   20.36 +#include <linux/icmp.h>
   20.37 +
   20.38 +#include <net/ip.h>
   20.39 +#include <net/protocol.h>
   20.40 +#include <net/route.h>
   20.41 +#include <net/checksum.h>
   20.42 +
   20.43 +#include <etherip.h>
   20.44 +#include <tunnel.h>
   20.45 +#include <vnet.h>
   20.46 +#include <varp.h>
   20.47 +#include <if_varp.h>
   20.48 +#include <skb_util.h>
   20.49 +
   20.50 +#define MODULE_NAME "VNET"
   20.51 +//#define DEBUG 1
   20.52 +#undef DEBUG
   20.53 +#include "debug.h"
   20.54 +
   20.55 +/** @file Etherip implementation.
   20.56 + * The etherip protocol is used to transport Ethernet frames in IP packets.
   20.57 + */
   20.58 +
   20.59 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
   20.60 +#define MAC_ETH(_skb) ((struct ethhdr *)(_skb)->mac.raw)
   20.61 +#else
   20.62 +#define MAC_ETH(_skb) ((_skb)->mac.ethernet)
   20.63 +#endif
   20.64 +
   20.65 +/** Get the vnet label from an etherip header.
   20.66 + *
   20.67 + * @param hdr header
   20.68 + * @return vnet (in host order)
   20.69 + */
   20.70 +int etheriphdr_get_vnet(struct etheriphdr *hdr){
   20.71 +#ifdef CONFIG_ETHERIP_EXT
   20.72 +    return ntohl(hdr->vnet);
   20.73 +#else
   20.74 +    return hdr->reserved;
   20.75 +#endif
   20.76 +}
   20.77 +
   20.78 +/** Set the vnet label in an etherip header.
   20.79 + * Also sets the etherip version.
   20.80 + *
   20.81 + * @param hdr header
   20.82 + * @param vnet vnet label (in host order)
   20.83 + */
   20.84 +void etheriphdr_set_vnet(struct etheriphdr *hdr, int vnet){
   20.85 +#ifdef CONFIG_ETHERIP_EXT
   20.86 +    hdr->version = 4;
   20.87 +    hdr->vnet = htonl(vnet);
   20.88 +#else
   20.89 +    hdr->version = 3;
   20.90 +    hdr->reserved = vnet & 0x0fff;
   20.91 +#endif
   20.92 +}
   20.93 +
   20.94 +/** Open an etherip tunnel.
   20.95 + *
   20.96 + * @param tunnel to open
   20.97 + * @return 0 on success, error code otherwise
   20.98 + */
   20.99 +static int etherip_tunnel_open(Tunnel *tunnel){
  20.100 +    return 0;
  20.101 +}
  20.102 +
  20.103 +/** Close an etherip tunnel.
  20.104 + *
  20.105 + * @param tunnel to close
  20.106 + */
  20.107 +static void etherip_tunnel_close(Tunnel *tunnel){
  20.108 +}
  20.109 +
  20.110 +
  20.111 +/** Send a packet via an etherip tunnel.
  20.112 + * Adds etherip header, new ip header, new ethernet header around
  20.113 + * ethernet frame.
  20.114 + *
  20.115 + * @param tunnel tunnel
  20.116 + * @param skb packet
  20.117 + * @return 0 on success, error code otherwise
  20.118 + */
  20.119 +static int etherip_tunnel_send(Tunnel *tunnel, struct sk_buff *skb){
  20.120 +    int err = 0;
  20.121 +    const int etherip_n = sizeof(struct etheriphdr);
  20.122 +    const int ip_n = sizeof(struct iphdr);
  20.123 +    const int eth_n = ETH_HLEN;
  20.124 +    int head_n = 0;
  20.125 +    int vnet = tunnel->key.vnet;
  20.126 +    struct etheriphdr *etheriph;
  20.127 +    struct ethhdr *ethh;
  20.128 +    u32 saddr = 0;
  20.129 +
  20.130 +    dprintf("> skb=%p vnet=%d\n", skb, vnet);
  20.131 +    head_n = etherip_n + ip_n + eth_n;
  20.132 +    err = skb_make_room(&skb, skb, head_n, 0);
  20.133 +    if(err) goto exit;
  20.134 +
  20.135 +    //err = vnet_get_device_address(skb->dev, &saddr);
  20.136 +    //if(err) goto exit;
  20.137 +    
  20.138 +    // The original ethernet header.
  20.139 +    ethh = MAC_ETH(skb);
  20.140 +    //print_skb_data(__FUNCTION__, 0, skb, skb->mac.raw, skb->len);
  20.141 +    // Null the pointer as we are pushing a new IP header.
  20.142 +    skb->mac.raw = NULL;
  20.143 +
  20.144 +    // Setup the etherip header.
  20.145 +    //dprintf("> push etherip header...\n");
  20.146 +    etheriph = (struct etheriphdr *)skb_push(skb, etherip_n);
  20.147 +    etheriphdr_set_vnet(etheriph, vnet);
  20.148 +
  20.149 +    // Setup the IP header.
  20.150 +    //dprintf("> push IP header...\n");
  20.151 +    skb->nh.raw = skb_push(skb, ip_n); 
  20.152 +    skb->nh.iph->version  = 4;			// Standard version.
  20.153 +    skb->nh.iph->ihl      = ip_n / 4;		// IP header length (32-bit words).
  20.154 +    skb->nh.iph->tos      = 0;			// No special type-of-service.
  20.155 +    skb->nh.iph->tot_len  = htons(skb->len);    // Total packet length (bytes).
  20.156 +    skb->nh.iph->id       = 0;			// No flow id (since no frags).
  20.157 +    skb->nh.iph->frag_off = htons(IP_DF);	// Don't fragment - can't handle frags.
  20.158 +    skb->nh.iph->ttl      = 64;			// Linux default time-to-live.
  20.159 +    skb->nh.iph->protocol = IPPROTO_ETHERIP;    // IP protocol number.
  20.160 +    skb->nh.iph->saddr    = saddr;		// Source address.
  20.161 +    skb->nh.iph->daddr    = tunnel->key.addr;	// Destination address.
  20.162 +    skb->nh.iph->check    = 0;
  20.163 +
  20.164 +    // Ethernet header will be filled-in by device.
  20.165 +    err = Tunnel_send(tunnel->base, skb);
  20.166 +    skb = NULL;
  20.167 +  exit:
  20.168 +    if(err && skb) dev_kfree_skb(skb);
  20.169 +    //dprintf("< err=%d\n", err);
  20.170 +    return err;
  20.171 +}
  20.172 +
  20.173 +/** Tunnel type for etherip.
  20.174 + */
  20.175 +static TunnelType _etherip_tunnel_type = {
  20.176 +    .name	= "ETHERIP",
  20.177 +    .open	= etherip_tunnel_open,
  20.178 +    .close	= etherip_tunnel_close,
  20.179 +    .send 	= etherip_tunnel_send
  20.180 +};
  20.181 +
  20.182 +TunnelType *etherip_tunnel_type = &_etherip_tunnel_type;
  20.183 +
  20.184 +/* Defeat compiler warnings about unused functions. */
  20.185 +static void print_str(char *s, int n) __attribute__((unused));
  20.186 +
  20.187 +static void print_str(char *s, int n) {
  20.188 +    int i;
  20.189 +
  20.190 +    for(i=0; i<n; s++, i++){
  20.191 +        if(i && i % 40 == 0) printk("\n");
  20.192 +        if(('a'<= *s && *s <= 'z') ||
  20.193 +           ('A'<= *s && *s <= 'Z') ||
  20.194 +           ('0'<= *s && *s <= '9')){
  20.195 +            printk("%c", *s);
  20.196 +        } else {
  20.197 +            printk("<%x>", (unsigned)(0xff & *s));
  20.198 +        }
  20.199 +    }
  20.200 +    printk("\n");
  20.201 +}
  20.202 +
  20.203 +/** Do etherip receive processing.
  20.204 + * Strips etherip header to extract the ethernet frame, sets
  20.205 + * the vnet from the header and re-receives the frame.
  20.206 + *
  20.207 + * @param skb packet
  20.208 + * @return 0 on success, error code otherwise
  20.209 + */
  20.210 +static int etherip_protocol_recv(struct sk_buff *skb){
  20.211 +    int err = 0;
  20.212 +    int mine = 0;
  20.213 +    const int eth_n = ETH_HLEN;
  20.214 +    int ip_n;
  20.215 +    const int etherip_n = sizeof(struct etheriphdr);
  20.216 +    struct etheriphdr *etheriph;
  20.217 +    struct ethhdr *ethhdr;
  20.218 +    Vnet *vinfo = NULL;
  20.219 +    u32 vnet;
  20.220 +
  20.221 +    ethhdr = MAC_ETH(skb);
  20.222 +    if(MULTICAST(skb->nh.iph->daddr) &&
  20.223 +       (skb->nh.iph->daddr != varp_mcast_addr)){
  20.224 +        // Ignore multicast packets not addressed to us.
  20.225 +        dprintf("> dst=%u.%u.%u.%u varp_mcast_addr=%u.%u.%u.%u\n",
  20.226 +                NIPQUAD(skb->nh.iph->daddr),
  20.227 +                NIPQUAD(varp_mcast_addr));
  20.228 +        goto exit;
  20.229 +    }
  20.230 +    ip_n = (skb->nh.iph->ihl << 2);
  20.231 +    if(skb->data == skb->mac.raw){
  20.232 +        // skb->data points at ethernet header.
  20.233 +        //dprintf("> len=%d\n", skb->len);
  20.234 +        if (!pskb_may_pull(skb, eth_n + ip_n)){
  20.235 +            wprintf("> Malformed skb\n");
  20.236 +            err = -EINVAL;
  20.237 +            goto exit;
  20.238 +        }
  20.239 +        skb_pull(skb, eth_n + ip_n);
  20.240 +    }
  20.241 +    // Assume skb->data points at etherip header.
  20.242 +    etheriph = (void*)skb->data;
  20.243 +    if(!pskb_may_pull(skb, etherip_n)){
  20.244 +        wprintf("> Malformed skb\n");
  20.245 +        err = -EINVAL;
  20.246 +        goto exit;
  20.247 +    }
  20.248 +    vnet = etheriphdr_get_vnet(etheriph);
  20.249 +    dprintf("> Rcvd skb=%p vnet=%d\n", skb, vnet);
  20.250 +    // If vnet is secure, context must include IPSEC ESP.
  20.251 +    err = vnet_check_context(vnet, SKB_CONTEXT(skb), &vinfo);
  20.252 +    Vnet_decref(vinfo);
  20.253 +    if(err){
  20.254 +        wprintf("> Failed security check\n");
  20.255 +        goto exit;
  20.256 +    }
  20.257 +    mine = 1;
  20.258 +    // Point at the headers in the contained ethernet frame.
  20.259 +    skb->mac.raw = skb_pull(skb, etherip_n);
  20.260 +
  20.261 +    // Know source ip, vnet, vmac, so could update varp cache.
  20.262 +    // But if traffic comes to us over a vnetd tunnel this points the coa
  20.263 +    // at the vnetd rather than the endpoint. So don't do it.
  20.264 +    //varp_update(htonl(vnet), MAC_ETH(skb)->h_source, skb->nh.iph->saddr);
  20.265 +
  20.266 +    // Assuming a standard Ethernet frame.
  20.267 +    skb->nh.raw = skb_pull(skb, ETH_HLEN);
  20.268 +
  20.269 +#ifdef CONFIG_NETFILTER
  20.270 +#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
  20.271 +    // This stops our new pkt header being clobbered by a subsequent
  20.272 +    // call to nf_bridge_maybe_copy_header. Just replicate the
  20.273 +    // corresponding nf_bridge_save_header.
  20.274 +    if(skb->nf_bridge){
  20.275 +        int header_size = 16;
  20.276 +        if(MAC_ETH(skb)->h_proto == __constant_htons(ETH_P_8021Q)) {
  20.277 +            header_size = 18;
  20.278 +        }
  20.279 +        memcpy(skb->nf_bridge->data, skb->data - header_size, header_size);
  20.280 +    }
  20.281 +#endif
  20.282 +#endif
  20.283 +    
  20.284 +    if(1){
  20.285 +	struct ethhdr *eth = MAC_ETH(skb);
  20.286 +        // Devices use eth_type_trans() to set skb->pkt_type and skb->protocol.
  20.287 +        // Set them from contained ethhdr, or leave as received?
  20.288 +        // 'Ware use of hard_header_len in eth_type_trans().
  20.289 +
  20.290 +        //skb->protocol = htons(ETH_P_IP);
  20.291 +
  20.292 +        if(ntohs(eth->h_proto) >= 1536){
  20.293 +            skb->protocol = eth->h_proto;
  20.294 +        } else {
  20.295 +            skb->protocol = htons(ETH_P_802_2);
  20.296 +        }
  20.297 +        
  20.298 +	if(mac_is_multicast(eth->h_dest)){
  20.299 +            if(mac_is_broadcast(eth->h_dest)){
  20.300 +                skb->pkt_type = PACKET_BROADCAST;
  20.301 +	    } else {
  20.302 +                skb->pkt_type = PACKET_MULTICAST;
  20.303 +            }
  20.304 +        } else {
  20.305 +            skb->pkt_type = PACKET_HOST;
  20.306 +	}
  20.307 +
  20.308 +        memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
  20.309 +        if (skb->ip_summed == CHECKSUM_HW){
  20.310 +            skb->ip_summed = CHECKSUM_NONE;
  20.311 +            //skb->csum = csum_sub(skb->csum,
  20.312 +            //                     csum_partial(skb->mac.raw, skb->nh.raw - skb->mac.raw, 0));
  20.313 +        }
  20.314 +        dst_release(skb->dst);
  20.315 +        skb->dst = NULL;
  20.316 +#ifdef CONFIG_NETFILTER
  20.317 +        nf_conntrack_put(skb->nfct);
  20.318 +        skb->nfct = NULL;
  20.319 +#ifdef CONFIG_NETFILTER_DEBUG
  20.320 +        skb->nf_debug = 0;
  20.321 +#endif
  20.322 +#endif
  20.323 +    }
  20.324 +
  20.325 +    //print_skb_data(__FUNCTION__, 0, skb, skb->mac.raw, skb->len + ETH_HLEN);
  20.326 +
  20.327 +    err = vnet_skb_recv(skb, vnet, (Vmac*)MAC_ETH(skb)->h_dest);
  20.328 +  exit:
  20.329 +    if(mine) err = 1;
  20.330 +    dprintf("< skb=%p err=%d\n", skb, err);
  20.331 +    return err;
  20.332 +}
  20.333 +
  20.334 +/** Handle an ICMP error related to etherip.
  20.335 + *
  20.336 + * @param skb ICMP error packet
  20.337 + * @param info
  20.338 + */
  20.339 +static void etherip_protocol_icmp_err(struct sk_buff *skb, u32 info){
  20.340 +    struct iphdr *iph = (struct iphdr*)skb->data;
  20.341 +    
  20.342 +    wprintf("> ICMP error type=%d code=%d addr=" IPFMT "\n",
  20.343 +            skb->h.icmph->type, skb->h.icmph->code, NIPQUAD(iph->daddr));
  20.344 +
  20.345 +    if (skb->h.icmph->type != ICMP_DEST_UNREACH ||
  20.346 +        skb->h.icmph->code != ICMP_FRAG_NEEDED){
  20.347 +        return;
  20.348 +    }
  20.349 +    wprintf("> MTU too big addr= " IPFMT "\n", NIPQUAD(iph->daddr)); 
  20.350 +}
  20.351 +
  20.352 +//============================================================================
  20.353 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
  20.354 +// Code for 2.6 kernel.
  20.355 +
  20.356 +/** Etherip protocol. */
  20.357 +static struct net_protocol etherip_protocol = {
  20.358 +    .handler	 = etherip_protocol_recv,
  20.359 +    .err_handler = etherip_protocol_icmp_err,
  20.360 +};
  20.361 +
  20.362 +static int etherip_protocol_add(void){
  20.363 +    return inet_add_protocol(&etherip_protocol, IPPROTO_ETHERIP);
  20.364 +}
  20.365 +
  20.366 +static int etherip_protocol_del(void){
  20.367 +    return inet_del_protocol(&etherip_protocol, IPPROTO_ETHERIP);
  20.368 +}
  20.369 +
  20.370 +//============================================================================
  20.371 +#else
  20.372 +//============================================================================
  20.373 +// Code for 2.4 kernel.
  20.374 +
  20.375 +/** Etherip protocol. */
  20.376 +static struct inet_protocol etherip_protocol = {
  20.377 +    .name        = "ETHERIP",
  20.378 +    .protocol    = IPPROTO_ETHERIP,
  20.379 +    .handler	 = etherip_protocol_recv,
  20.380 +    .err_handler = etherip_protocol_icmp_err,
  20.381 +};
  20.382 +
  20.383 +static int etherip_protocol_add(void){
  20.384 +    inet_add_protocol(&etherip_protocol);
  20.385 +    return 0;
  20.386 +}
  20.387 +
  20.388 +static int etherip_protocol_del(void){
  20.389 +    return inet_del_protocol(&etherip_protocol);
  20.390 +}
  20.391 +
  20.392 +#endif
  20.393 +//============================================================================
  20.394 +
  20.395 +
  20.396 +/** Initialize the etherip module.
  20.397 + * Registers the etherip protocol.
  20.398 + *
  20.399 + * @return 0 on success, error code otherwise
  20.400 + */
  20.401 +int __init etherip_module_init(void) {
  20.402 +    int err = 0;
  20.403 +    etherip_protocol_add();
  20.404 +    return err;
  20.405 +}
  20.406 +
  20.407 +/** Finalize the etherip module.
  20.408 + * Deregisters the etherip protocol.
  20.409 + */
  20.410 +void __exit etherip_module_exit(void) {
  20.411 +    if(etherip_protocol_del() < 0){
  20.412 +        printk(KERN_INFO "%s: can't remove etherip protocol\n", __FUNCTION__);
  20.413 +    }
  20.414 +}
    21.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    21.2 +++ b/tools/vnet/vnet-module/etherip.h	Mon Nov 22 16:49:15 2004 +0000
    21.3 @@ -0,0 +1,27 @@
    21.4 +/*
    21.5 + * Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
    21.6 + *
    21.7 + * This program is free software; you can redistribute it and/or modify
    21.8 + * it under the terms of the GNU General Public License as published by the 
    21.9 + * Free Software Foundation; either version 2 of the License, or (at your
   21.10 + * option) any later version.
   21.11 + * 
   21.12 + * This program is distributed in the hope that it will be useful, but
   21.13 + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
   21.14 + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
   21.15 + * for more details.
   21.16 + *
   21.17 + * You should have received a copy of the GNU General Public License along
   21.18 + * with this program; if not, write to the Free software Foundation, Inc.,
   21.19 + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA
   21.20 + *
   21.21 + */
   21.22 +#ifndef _VNET_ETHERIP_H_
   21.23 +#define _VNET_ETHERIP_H_
   21.24 +
   21.25 +#include "if_etherip.h"
   21.26 +
   21.27 +extern int etherip_module_init(void);
   21.28 +extern void etherip_module_exit(void);
   21.29 +
   21.30 +#endif
    22.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    22.2 +++ b/tools/vnet/vnet-module/if_etherip.h	Mon Nov 22 16:49:15 2004 +0000
    22.3 @@ -0,0 +1,51 @@
    22.4 +/*
    22.5 + * Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
    22.6 + *
    22.7 + * This program is free software; you can redistribute it and/or modify
    22.8 + * it under the terms of the GNU General Public License as published by the 
    22.9 + * Free Software Foundation; either version 2 of the License, or (at your
   22.10 + * option) any later version.
   22.11 + * 
   22.12 + * This program is distributed in the hope that it will be useful, but
   22.13 + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
   22.14 + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
   22.15 + * for more details.
   22.16 + *
   22.17 + * You should have received a copy of the GNU General Public License along
   22.18 + * with this program; if not, write to the Free software Foundation, Inc.,
   22.19 + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA
   22.20 + *
   22.21 + */
   22.22 +#ifndef _VNET_IF_ETHERIP_H_
   22.23 +#define _VNET_IF_ETHERIP_H_
   22.24 +/*----------------------------------------------------------------------------*/
   22.25 +#ifdef CONFIG_ETHERIP_EXT
   22.26 +struct etheriphdr {
   22.27 +    __u8 version;
   22.28 +    __u32 vnet;
   22.29 +} __attribute__ ((packed));
   22.30 +
   22.31 +/*----------------------------------------------------------------------------*/
   22.32 +#else
   22.33 +struct etheriphdr
   22.34 +{
   22.35 +#if defined(__LITTLE_ENDIAN_BITFIELD)
   22.36 +    __u16    reserved:12,
   22.37 +             version:4;
   22.38 +#elif defined (__BIG_ENDIAN_BITFIELD)
   22.39 +    __u16    version:4,
   22.40 +            reserved:12;
   22.41 +#else
   22.42 +#error  "Please fix <asm/byteorder.h>"
   22.43 +#endif
   22.44 +
   22.45 +};
   22.46 +#endif
   22.47 +
   22.48 +#ifndef IPPROTO_ETHERIP
   22.49 +#define IPPROTO_ETHERIP 97
   22.50 +#endif
   22.51 +
   22.52 +/*----------------------------------------------------------------------------*/
   22.53 +
   22.54 +#endif /* ! _VNET_IF_ETHERIP_H_ */
    23.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    23.2 +++ b/tools/vnet/vnet-module/if_varp.h	Mon Nov 22 16:49:15 2004 +0000
    23.3 @@ -0,0 +1,53 @@
    23.4 +/*
    23.5 + * Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
    23.6 + *
    23.7 + * This program is free software; you can redistribute it and/or modify
    23.8 + * it under the terms of the GNU General Public License as published by the 
    23.9 + * Free Software Foundation; either version 2 of the License, or (at your
   23.10 + * option) any later version.
   23.11 + * 
   23.12 + * This program is distributed in the hope that it will be useful, but
   23.13 + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
   23.14 + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
   23.15 + * for more details.
   23.16 + *
   23.17 + * You should have received a copy of the GNU General Public License along
   23.18 + * with this program; if not, write to the Free software Foundation, Inc.,
   23.19 + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA
   23.20 + *
   23.21 + */
   23.22 +
   23.23 +#ifndef _VNET_IF_VARP_H
   23.24 +#define _VNET_IF_VARP_H
   23.25 +
   23.26 +typedef struct Vmac {
   23.27 +    unsigned char mac[ETH_ALEN];
   23.28 +} Vmac;
   23.29 +
   23.30 +enum {
   23.31 +    VARP_ID          = 1,
   23.32 +    VARP_OP_REQUEST  = 1,
   23.33 +    VARP_OP_ANNOUNCE = 2,
   23.34 +};
   23.35 +
   23.36 +typedef struct VnetMsgHdr {
   23.37 +    uint16_t id;
   23.38 +    uint16_t opcode;
   23.39 +} __attribute__((packed)) VnetMsgHdr;
   23.40 +
   23.41 +typedef struct VarpHdr {
   23.42 +    VnetMsgHdr;
   23.43 +    uint32_t vnet;
   23.44 +    Vmac vmac;
   23.45 +    uint32_t addr;
   23.46 +} __attribute__((packed)) VarpHdr;
   23.47 +
   23.48 +/** Default address for varp/vnet broadcasts: 224.10.0.1 */
   23.49 +#define VARP_MCAST_ADDR     0xe00a0001
   23.50 +
   23.51 +/** UDP port to use for varp protocol. */
   23.52 +#define VARP_PORT           1798
   23.53 +
   23.54 +
   23.55 +
   23.56 +#endif /* ! _VNET_IF_VARP_H */
    24.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    24.2 +++ b/tools/vnet/vnet-module/linux/pfkeyv2.h	Mon Nov 22 16:49:15 2004 +0000
    24.3 @@ -0,0 +1,329 @@
    24.4 +/* PF_KEY user interface, this is defined by rfc2367 so
    24.5 + * do not make arbitrary modifications or else this header
    24.6 + * file will not be compliant.
    24.7 + */
    24.8 +
    24.9 +#ifndef _LINUX_PFKEY2_H
   24.10 +#define _LINUX_PFKEY2_H
   24.11 +
   24.12 +#include <linux/types.h>
   24.13 +
   24.14 +#define PF_KEY_V2		2
   24.15 +#define PFKEYV2_REVISION	199806L
   24.16 +
   24.17 +struct sadb_msg {
   24.18 +	uint8_t		sadb_msg_version;
   24.19 +	uint8_t		sadb_msg_type;
   24.20 +	uint8_t		sadb_msg_errno;
   24.21 +	uint8_t		sadb_msg_satype;
   24.22 +	uint16_t	sadb_msg_len;
   24.23 +	uint16_t	sadb_msg_reserved;
   24.24 +	uint32_t	sadb_msg_seq;
   24.25 +	uint32_t	sadb_msg_pid;
   24.26 +} __attribute__((packed));
   24.27 +/* sizeof(struct sadb_msg) == 16 */
   24.28 +
   24.29 +struct sadb_ext {
   24.30 +	uint16_t	sadb_ext_len;
   24.31 +	uint16_t	sadb_ext_type;
   24.32 +} __attribute__((packed));
   24.33 +/* sizeof(struct sadb_ext) == 4 */
   24.34 +
   24.35 +struct sadb_sa {
   24.36 +	uint16_t	sadb_sa_len;
   24.37 +	uint16_t	sadb_sa_exttype;
   24.38 +	uint32_t	sadb_sa_spi;
   24.39 +	uint8_t		sadb_sa_replay;
   24.40 +	uint8_t		sadb_sa_state;
   24.41 +	uint8_t		sadb_sa_auth;
   24.42 +	uint8_t		sadb_sa_encrypt;
   24.43 +	uint32_t	sadb_sa_flags;
   24.44 +} __attribute__((packed));
   24.45 +/* sizeof(struct sadb_sa) == 16 */
   24.46 +
   24.47 +struct sadb_lifetime {
   24.48 +	uint16_t	sadb_lifetime_len;
   24.49 +	uint16_t	sadb_lifetime_exttype;
   24.50 +	uint32_t	sadb_lifetime_allocations;
   24.51 +	uint64_t	sadb_lifetime_bytes;
   24.52 +	uint64_t	sadb_lifetime_addtime;
   24.53 +	uint64_t	sadb_lifetime_usetime;
   24.54 +} __attribute__((packed));
   24.55 +/* sizeof(struct sadb_lifetime) == 32 */
   24.56 +
   24.57 +struct sadb_address {
   24.58 +	uint16_t	sadb_address_len;
   24.59 +	uint16_t	sadb_address_exttype;
   24.60 +	uint8_t		sadb_address_proto;
   24.61 +	uint8_t		sadb_address_prefixlen;
   24.62 +	uint16_t	sadb_address_reserved;
   24.63 +} __attribute__((packed));
   24.64 +/* sizeof(struct sadb_address) == 8 */
   24.65 +
   24.66 +struct sadb_key {
   24.67 +	uint16_t	sadb_key_len;
   24.68 +	uint16_t	sadb_key_exttype;
   24.69 +	uint16_t	sadb_key_bits;
   24.70 +	uint16_t	sadb_key_reserved;
   24.71 +} __attribute__((packed));
   24.72 +/* sizeof(struct sadb_key) == 8 */
   24.73 +
   24.74 +struct sadb_ident {
   24.75 +	uint16_t	sadb_ident_len;
   24.76 +	uint16_t	sadb_ident_exttype;
   24.77 +	uint16_t	sadb_ident_type;
   24.78 +	uint16_t	sadb_ident_reserved;
   24.79 +	uint64_t	sadb_ident_id;
   24.80 +} __attribute__((packed));
   24.81 +/* sizeof(struct sadb_ident) == 16 */
   24.82 +
   24.83 +struct sadb_sens {
   24.84 +	uint16_t	sadb_sens_len;
   24.85 +	uint16_t	sadb_sens_exttype;
   24.86 +	uint32_t	sadb_sens_dpd;
   24.87 +	uint8_t		sadb_sens_sens_level;
   24.88 +	uint8_t		sadb_sens_sens_len;
   24.89 +	uint8_t		sadb_sens_integ_level;
   24.90 +	uint8_t		sadb_sens_integ_len;
   24.91 +	uint32_t	sadb_sens_reserved;
   24.92 +} __attribute__((packed));
   24.93 +/* sizeof(struct sadb_sens) == 16 */
   24.94 +
   24.95 +/* followed by:
   24.96 +	uint64_t	sadb_sens_bitmap[sens_len];
   24.97 +	uint64_t	sadb_integ_bitmap[integ_len];  */
   24.98 +
   24.99 +struct sadb_prop {
  24.100 +	uint16_t	sadb_prop_len;
  24.101 +	uint16_t	sadb_prop_exttype;
  24.102 +	uint8_t		sadb_prop_replay;
  24.103 +	uint8_t		sadb_prop_reserved[3];
  24.104 +} __attribute__((packed));
  24.105 +/* sizeof(struct sadb_prop) == 8 */
  24.106 +
  24.107 +/* followed by:
  24.108 +	struct sadb_comb sadb_combs[(sadb_prop_len +
  24.109 +		sizeof(uint64_t) - sizeof(struct sadb_prop)) /
  24.110 +		sizeof(strut sadb_comb)]; */
  24.111 +
  24.112 +struct sadb_comb {
  24.113 +	uint8_t		sadb_comb_auth;
  24.114 +	uint8_t		sadb_comb_encrypt;
  24.115 +	uint16_t	sadb_comb_flags;
  24.116 +	uint16_t	sadb_comb_auth_minbits;
  24.117 +	uint16_t	sadb_comb_auth_maxbits;
  24.118 +	uint16_t	sadb_comb_encrypt_minbits;
  24.119 +	uint16_t	sadb_comb_encrypt_maxbits;
  24.120 +	uint32_t	sadb_comb_reserved;
  24.121 +	uint32_t	sadb_comb_soft_allocations;
  24.122 +	uint32_t	sadb_comb_hard_allocations;
  24.123 +	uint64_t	sadb_comb_soft_bytes;
  24.124 +	uint64_t	sadb_comb_hard_bytes;
  24.125 +	uint64_t	sadb_comb_soft_addtime;
  24.126 +	uint64_t	sadb_comb_hard_addtime;
  24.127 +	uint64_t	sadb_comb_soft_usetime;
  24.128 +	uint64_t	sadb_comb_hard_usetime;
  24.129 +} __attribute__((packed));
  24.130 +/* sizeof(struct sadb_comb) == 72 */
  24.131 +
  24.132 +struct sadb_supported {
  24.133 +	uint16_t	sadb_supported_len;
  24.134 +	uint16_t	sadb_supported_exttype;
  24.135 +	uint32_t	sadb_supported_reserved;
  24.136 +} __attribute__((packed));
  24.137 +/* sizeof(struct sadb_supported) == 8 */
  24.138 +
  24.139 +/* followed by:
  24.140 +	struct sadb_alg sadb_algs[(sadb_supported_len +
  24.141 +		sizeof(uint64_t) - sizeof(struct sadb_supported)) /
  24.142 +		sizeof(struct sadb_alg)]; */
  24.143 +
  24.144 +struct sadb_alg {
  24.145 +	uint8_t		sadb_alg_id;
  24.146 +	uint8_t		sadb_alg_ivlen;
  24.147 +	uint16_t	sadb_alg_minbits;
  24.148 +	uint16_t	sadb_alg_maxbits;
  24.149 +	uint16_t	sadb_alg_reserved;
  24.150 +} __attribute__((packed));
  24.151 +/* sizeof(struct sadb_alg) == 8 */
  24.152 +
  24.153 +struct sadb_spirange {
  24.154 +	uint16_t	sadb_spirange_len;
  24.155 +	uint16_t	sadb_spirange_exttype;
  24.156 +	uint32_t	sadb_spirange_min;
  24.157 +	uint32_t	sadb_spirange_max;
  24.158 +	uint32_t	sadb_spirange_reserved;
  24.159 +} __attribute__((packed));
  24.160 +/* sizeof(struct sadb_spirange) == 16 */
  24.161 +
  24.162 +struct sadb_x_kmprivate {
  24.163 +	uint16_t	sadb_x_kmprivate_len;
  24.164 +	uint16_t	sadb_x_kmprivate_exttype;
  24.165 +	u_int32_t	sadb_x_kmprivate_reserved;
  24.166 +} __attribute__((packed));
  24.167 +/* sizeof(struct sadb_x_kmprivate) == 8 */
  24.168 +
  24.169 +struct sadb_x_sa2 {
  24.170 +	uint16_t	sadb_x_sa2_len;
  24.171 +	uint16_t	sadb_x_sa2_exttype;
  24.172 +	uint8_t		sadb_x_sa2_mode;
  24.173 +	uint8_t		sadb_x_sa2_reserved1;
  24.174 +	uint16_t	sadb_x_sa2_reserved2;
  24.175 +	uint32_t	sadb_x_sa2_sequence;
  24.176 +	uint32_t	sadb_x_sa2_reqid;
  24.177 +} __attribute__((packed));
  24.178 +/* sizeof(struct sadb_x_sa2) == 16 */
  24.179 +
  24.180 +struct sadb_x_policy {
  24.181 +	uint16_t	sadb_x_policy_len;
  24.182 +	uint16_t	sadb_x_policy_exttype;
  24.183 +	uint16_t	sadb_x_policy_type;
  24.184 +	uint8_t		sadb_x_policy_dir;
  24.185 +	uint8_t		sadb_x_policy_reserved;
  24.186 +	uint32_t	sadb_x_policy_id;
  24.187 +	uint32_t	sadb_x_policy_reserved2;
  24.188 +} __attribute__((packed));
  24.189 +/* sizeof(struct sadb_x_policy) == 16 */
  24.190 +
  24.191 +struct sadb_x_ipsecrequest {
  24.192 +	uint16_t	sadb_x_ipsecrequest_len;
  24.193 +	uint16_t	sadb_x_ipsecrequest_proto;
  24.194 +	uint8_t		sadb_x_ipsecrequest_mode;
  24.195 +	uint8_t		sadb_x_ipsecrequest_level;
  24.196 +	uint16_t	sadb_x_ipsecrequest_reqid;
  24.197 +} __attribute__((packed));
  24.198 +/* sizeof(struct sadb_x_ipsecrequest) == 16 */
  24.199 +
  24.200 +/* This defines the TYPE of Nat Traversal in use.  Currently only one
  24.201 + * type of NAT-T is supported, draft-ietf-ipsec-udp-encaps-06
  24.202 + */
  24.203 +struct sadb_x_nat_t_type {
  24.204 +	uint16_t	sadb_x_nat_t_type_len;
  24.205 +	uint16_t	sadb_x_nat_t_type_exttype;
  24.206 +	uint8_t		sadb_x_nat_t_type_type;
  24.207 +	uint8_t		sadb_x_nat_t_type_reserved[3];
  24.208 +} __attribute__((packed));
  24.209 +/* sizeof(struct sadb_x_nat_t_type) == 8 */
  24.210 +
  24.211 +/* Pass a NAT Traversal port (Source or Dest port) */
  24.212 +struct sadb_x_nat_t_port {
  24.213 +	uint16_t	sadb_x_nat_t_port_len;
  24.214 +	uint16_t	sadb_x_nat_t_port_exttype;
  24.215 +	uint16_t	sadb_x_nat_t_port_port;
  24.216 +	uint16_t	sadb_x_nat_t_port_reserved;
  24.217 +} __attribute__((packed));
  24.218 +/* sizeof(struct sadb_x_nat_t_port) == 8 */
  24.219 +
  24.220 +/* Message types */
  24.221 +#define SADB_RESERVED		0
  24.222 +#define SADB_GETSPI		1
  24.223 +#define SADB_UPDATE		2
  24.224 +#define SADB_ADD		3
  24.225 +#define SADB_DELETE		4
  24.226 +#define SADB_GET		5
  24.227 +#define SADB_ACQUIRE		6
  24.228 +#define SADB_REGISTER		7
  24.229 +#define SADB_EXPIRE		8
  24.230 +#define SADB_FLUSH		9
  24.231 +#define SADB_DUMP		10
  24.232 +#define SADB_X_PROMISC		11
  24.233 +#define SADB_X_PCHANGE		12
  24.234 +#define SADB_X_SPDUPDATE	13
  24.235 +#define SADB_X_SPDADD		14
  24.236 +#define SADB_X_SPDDELETE	15
  24.237 +#define SADB_X_SPDGET		16
  24.238 +#define SADB_X_SPDACQUIRE	17
  24.239 +#define SADB_X_SPDDUMP		18
  24.240 +#define SADB_X_SPDFLUSH		19
  24.241 +#define SADB_X_SPDSETIDX	20
  24.242 +#define SADB_X_SPDEXPIRE	21
  24.243 +#define SADB_X_SPDDELETE2	22
  24.244 +#define SADB_X_NAT_T_NEW_MAPPING	23
  24.245 +#define SADB_MAX		23
  24.246 +
  24.247 +/* Security Association flags */
  24.248 +#define SADB_SAFLAGS_PFS	1
  24.249 +
  24.250 +/* Security Association states */
  24.251 +#define SADB_SASTATE_LARVAL	0
  24.252 +#define SADB_SASTATE_MATURE	1
  24.253 +#define SADB_SASTATE_DYING	2
  24.254 +#define SADB_SASTATE_DEAD	3
  24.255 +#define SADB_SASTATE_MAX	3
  24.256 +
  24.257 +/* Security Association types */
  24.258 +#define SADB_SATYPE_UNSPEC	0
  24.259 +#define SADB_SATYPE_AH		2
  24.260 +#define SADB_SATYPE_ESP		3
  24.261 +#define SADB_SATYPE_RSVP	5
  24.262 +#define SADB_SATYPE_OSPFV2	6
  24.263 +#define SADB_SATYPE_RIPV2	7
  24.264 +#define SADB_SATYPE_MIP		8
  24.265 +#define SADB_X_SATYPE_IPCOMP	9
  24.266 +#define SADB_SATYPE_MAX		9
  24.267 +
  24.268 +/* Authentication algorithms */
  24.269 +#define SADB_AALG_NONE			0
  24.270 +#define SADB_AALG_MD5HMAC		2
  24.271 +#define SADB_AALG_SHA1HMAC		3
  24.272 +#define SADB_X_AALG_SHA2_256HMAC	5
  24.273 +#define SADB_X_AALG_SHA2_384HMAC	6
  24.274 +#define SADB_X_AALG_SHA2_512HMAC	7
  24.275 +#define SADB_X_AALG_RIPEMD160HMAC	8
  24.276 +#define SADB_X_AALG_NULL		251	/* kame */
  24.277 +#define SADB_AALG_MAX			251
  24.278 +
  24.279 +/* Encryption algorithms */
  24.280 +#define SADB_EALG_NONE			0
  24.281 +#define SADB_EALG_DESCBC		2
  24.282 +#define SADB_EALG_3DESCBC		3
  24.283 +#define SADB_X_EALG_CASTCBC		6
  24.284 +#define SADB_X_EALG_BLOWFISHCBC		7
  24.285 +#define SADB_EALG_NULL			11
  24.286 +#define SADB_X_EALG_AESCBC		12
  24.287 +#define SADB_EALG_MAX			12
  24.288 +
  24.289 +/* Compression algorithms */
  24.290 +#define SADB_X_CALG_NONE		0
  24.291 +#define SADB_X_CALG_OUI			1
  24.292 +#define SADB_X_CALG_DEFLATE		2
  24.293 +#define SADB_X_CALG_LZS			3
  24.294 +#define SADB_X_CALG_LZJH		4
  24.295 +#define SADB_X_CALG_MAX			4
  24.296 +
  24.297 +/* Extension Header values */
  24.298 +#define SADB_EXT_RESERVED		0
  24.299 +#define SADB_EXT_SA			1
  24.300 +#define SADB_EXT_LIFETIME_CURRENT	2
  24.301 +#define SADB_EXT_LIFETIME_HARD		3
  24.302 +#define SADB_EXT_LIFETIME_SOFT		4
  24.303 +#define SADB_EXT_ADDRESS_SRC		5
  24.304 +#define SADB_EXT_ADDRESS_DST		6
  24.305 +#define SADB_EXT_ADDRESS_PROXY		7
  24.306 +#define SADB_EXT_KEY_AUTH		8
  24.307 +#define SADB_EXT_KEY_ENCRYPT		9
  24.308 +#define SADB_EXT_IDENTITY_SRC		10
  24.309 +#define SADB_EXT_IDENTITY_DST		11
  24.310 +#define SADB_EXT_SENSITIVITY		12
  24.311 +#define SADB_EXT_PROPOSAL		13
  24.312 +#define SADB_EXT_SUPPORTED_AUTH		14
  24.313 +#define SADB_EXT_SUPPORTED_ENCRYPT	15
  24.314 +#define SADB_EXT_SPIRANGE		16
  24.315 +#define SADB_X_EXT_KMPRIVATE		17
  24.316 +#define SADB_X_EXT_POLICY		18
  24.317 +#define SADB_X_EXT_SA2			19
  24.318 +/* The next four entries are for setting up NAT Traversal */
  24.319 +#define SADB_X_EXT_NAT_T_TYPE		20
  24.320 +#define SADB_X_EXT_NAT_T_SPORT		21
  24.321 +#define SADB_X_EXT_NAT_T_DPORT		22
  24.322 +#define SADB_X_EXT_NAT_T_OA		23
  24.323 +#define SADB_EXT_MAX			23
  24.324 +
  24.325 +/* Identity Extension values */
  24.326 +#define SADB_IDENTTYPE_RESERVED	0
  24.327 +#define SADB_IDENTTYPE_PREFIX	1
  24.328 +#define SADB_IDENTTYPE_FQDN	2
  24.329 +#define SADB_IDENTTYPE_USERFQDN	3
  24.330 +#define SADB_IDENTTYPE_MAX	3
  24.331 +
  24.332 +#endif /* !(_LINUX_PFKEY2_H) */
    25.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    25.2 +++ b/tools/vnet/vnet-module/random.c	Mon Nov 22 16:49:15 2004 +0000
    25.3 @@ -0,0 +1,101 @@
    25.4 +/*
    25.5 + * Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
    25.6 + *
    25.7 + * This program is free software; you can redistribute it and/or modify
    25.8 + * it under the terms of the GNU General Public License as published by the 
    25.9 + * Free Software Foundation; either version 2 of the License, or (at your
   25.10 + * option) any later version.
   25.11 + * 
   25.12 + * This program is distributed in the hope that it will be useful, but
   25.13 + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
   25.14 + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
   25.15 + * for more details.
   25.16 + *
   25.17 + * You should have received a copy of the GNU General Public License along
   25.18 + * with this program; if not, write to the Free software Foundation, Inc.,
   25.19 + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA
   25.20 + *
   25.21 + */
   25.22 +#include <linux/config.h>
   25.23 +#include <linux/module.h>
   25.24 +#include <linux/init.h>
   25.25 +#include <linux/sched.h>
   25.26 +#include <linux/random.h>
   25.27 +
   25.28 +#include "hash_table.h"
   25.29 +
   25.30 +#define MODULE_NAME "RANDOM"
   25.31 +#define DEBUG 1
   25.32 +#undef DEBUG
   25.33 +#include "debug.h"
   25.34 +
   25.35 +/** @file
   25.36 + * Source of randomness.
   25.37 + * Current implementation is not enough.
   25.38 + * Needs to be cryptographically strong.
   25.39 + */
   25.40 +
   25.41 +static unsigned long seed = 0;
   25.42 +static unsigned long count = 0;
   25.43 +
   25.44 +static unsigned long stir(unsigned long *a, unsigned long b){
   25.45 +    pseudo_des(a, &b);
   25.46 +    return b;
   25.47 +}    
   25.48 +
   25.49 +/** Get one random byte.
   25.50 + *
   25.51 + * @return random byte
   25.52 + */
   25.53 +int get_random_byte(void){
   25.54 +    return stir(&seed, ++count);
   25.55 +}
   25.56 +
   25.57 +#if 0
   25.58 +/* Get some random bytes.
   25.59 + *
   25.60 + * @param dst destination for the bytes
   25.61 + * @param dst_n number of bytes to get
   25.62 + */
   25.63 +void get_random_bytes(void *dst, int dst_n){
   25.64 +    int i;
   25.65 +    char *p = (char *)dst;
   25.66 +    for(i = 0; i < dst_n; i++){
   25.67 +        *p++ = get_random_byte();
   25.68 +    }
   25.69 +}
   25.70 +#endif
   25.71 +
   25.72 +/** Contribute a random byte.
   25.73 + *
   25.74 + * @param b byte to contribute
   25.75 + */
   25.76 +void add_random_byte(int b){
   25.77 +    stir(&seed, ++count);
   25.78 +    stir(&seed, b);
   25.79 +}
   25.80 +
   25.81 +/** Contribute some random bytes.
   25.82 + *
   25.83 + * @param src bytes to contribute
   25.84 + * @param src_n number of bytes
   25.85 + */
   25.86 +void add_random_bytes(const void *src, int src_n){
   25.87 +    int i;
   25.88 +    char *p = (char *)src;
   25.89 +    for(i = 0; i < src_n; i++){
   25.90 +        add_random_byte(*p++);
   25.91 +    }
   25.92 +}
   25.93 +
   25.94 +int __init random_module_init(void){
   25.95 +    int dummy;
   25.96 +    int tmp = jiffies;
   25.97 +    seed = (unsigned long)&dummy;
   25.98 +    add_random_byte(tmp);
   25.99 +    return 0;
  25.100 +}
  25.101 +
  25.102 +void __exit random_module_exit(void){
  25.103 +}
  25.104 +
    26.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    26.2 +++ b/tools/vnet/vnet-module/random.h	Mon Nov 22 16:49:15 2004 +0000
    26.3 @@ -0,0 +1,30 @@
    26.4 +/*
    26.5 + * Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
    26.6 + *
    26.7 + * This program is free software; you can redistribute it and/or modify
    26.8 + * it under the terms of the GNU General Public License as published by the 
    26.9 + * Free Software Foundation; either version 2 of the License, or (at your
   26.10 + * option) any later version.
   26.11 + * 
   26.12 + * This program is distributed in the hope that it will be useful, but
   26.13 + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
   26.14 + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
   26.15 + * for more details.
   26.16 + *
   26.17 + * You should have received a copy of the GNU General Public License along
   26.18 + * with this program; if not, write to the Free software Foundation, Inc.,
   26.19 + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA
   26.20 + *
   26.21 + */
   26.22 +#ifndef __VNET_RANDOM_H__
   26.23 +#define __VNET_RANDOM_H__
   26.24 +
   26.25 +extern int get_random_byte(void);
   26.26 +extern void get_random_bytes(void *dst, int dst_n);
   26.27 +extern void add_random_byte(int b);
   26.28 +extern void add_random_bytes(const void *src, int src_n);
   26.29 +
   26.30 +extern int random_module_init(void);
   26.31 +extern void random_module_exit(void);
   26.32 +
   26.33 +#endif /* ! __VNET_RANDOM_H__ */
    27.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    27.2 +++ b/tools/vnet/vnet-module/sa.c	Mon Nov 22 16:49:15 2004 +0000
    27.3 @@ -0,0 +1,670 @@
    27.4 +/*
    27.5 + * Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
    27.6 + *
    27.7 + * This program is free software; you can redistribute it and/or modify
    27.8 + * it under the terms of the GNU General Public License as published by the 
    27.9 + * Free Software Foundation; either version 2 of the License, or (at your
   27.10 + * option) any later version.
   27.11 + * 
   27.12 + * This program is distributed in the hope that it will be useful, but
   27.13 + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
   27.14 + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
   27.15 + * for more details.
   27.16 + *
   27.17 + * You should have received a copy of the GNU General Public License along
   27.18 + * with this program; if not, write to the Free software Foundation, Inc.,
   27.19 + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA
   27.20 + *
   27.21 + */
   27.22 +#include <linux/config.h>
   27.23 +#include <linux/kernel.h>
   27.24 +
   27.25 +#include <net/ip.h>
   27.26 +#include <net/protocol.h>
   27.27 +#include <net/route.h>
   27.28 +#include <linux/skbuff.h>
   27.29 +
   27.30 +#include <linux/in.h>
   27.31 +#include <linux/inet.h>
   27.32 +#include <linux/netdevice.h>
   27.33 +
   27.34 +#include <sa.h>
   27.35 +#include <sa_algorithm.h>
   27.36 +#include "hash_table.h"
   27.37 +#include "allocate.h"
   27.38 +
   27.39 +#define MODULE_NAME "IPSEC"
   27.40 +#define DEBUG 1
   27.41 +#undef DEBUG
   27.42 +#include "debug.h"
   27.43 +
   27.44 +/** @file IPSEC Security Association (SA).
   27.45 + */
   27.46 +
   27.47 +/** Maximum number of protocols.*/
   27.48 +#define INET_PROTOCOL_MAX 256
   27.49 +
   27.50 +/** Table of SA types indexed by protocol. */
   27.51 +static SAType *sa_type[INET_PROTOCOL_MAX] = {};
   27.52 +
   27.53 +/** Hash a protocol number.
   27.54 + *
   27.55 + * @param protocol protocol number
   27.56 + * @return hashcode
   27.57 + */
   27.58 +static inline unsigned char InetProtocol_hash(int protocol){
   27.59 +    return (protocol) & (INET_PROTOCOL_MAX - 1);
   27.60 +}
   27.61 +
   27.62 +/** Register an SA type.
   27.63 + * It is an error if an SA type is already registered for the protocol.
   27.64 + *
   27.65 + * @param type SA type
   27.66 + * @return 0 on success, error code otherwise
   27.67 + */
   27.68 +int SAType_add(SAType *type){
   27.69 +    int err = -EINVAL;
   27.70 +    int hash;
   27.71 +    if(!type) goto exit;
   27.72 +    hash = InetProtocol_hash(type->protocol);
   27.73 +    if(sa_type[hash]) goto exit;
   27.74 +    err = 0;
   27.75 +    sa_type[hash] = type;
   27.76 +  exit:
   27.77 +    return err;
   27.78 +}
   27.79 +
   27.80 +/** Deregister an SA type.
   27.81 + * It is an error if no SA type is registered for the protocol.
   27.82 + *
   27.83 + * @param type SA type
   27.84 + * @return 0 on success, error code otherwise
   27.85 + */
   27.86 +int SAType_del(SAType *type){
   27.87 +    int err = -EINVAL;
   27.88 +    int hash;
   27.89 +    if(!type) goto exit;
   27.90 +    hash = InetProtocol_hash(type->protocol);
   27.91 +    if(!sa_type[hash]) goto exit;
   27.92 +    err = 0;
   27.93 +    sa_type[hash] = NULL;
   27.94 +  exit:
   27.95 +    return err;
   27.96 +}
   27.97 +
   27.98 +int SAType_get(int protocol, SAType **type){
   27.99 +   int err = -ENOENT;
  27.100 +   int hash;
  27.101 +   hash = InetProtocol_hash(protocol);
  27.102 +   *type = sa_type[hash];
  27.103 +   if(!*type) goto exit;
  27.104 +   err = 0;
  27.105 +  exit:
  27.106 +   return err;
  27.107 +}
  27.108 +
  27.109 +/* Defeat compiler warnings about unused functions. */
  27.110 +static int sa_key_check(SAKey *key, enum sa_alg_type type) __attribute__((unused));
  27.111 +static u32 random_spi(void) __attribute__((unused));
  27.112 +static u32 generate_key(u32 key, u32 offset, u32 spi) __attribute__((unused));
  27.113 +
  27.114 +/** Check a key has an acceptable length for an algorithm.
  27.115 + *
  27.116 + * @param key key
  27.117 + * @param type algorithm
  27.118 + * @return 0 on success, error code otherwise
  27.119 + */
  27.120 +static int sa_key_check(SAKey *key, enum sa_alg_type type){
  27.121 +    return 0;
  27.122 +}
  27.123 +
  27.124 +static unsigned long sa_spi_counter = 0;
  27.125 +
  27.126 +/** Generate a random spi.
  27.127 + * Uses a hashed counter.
  27.128 + *
  27.129 + * @return spi
  27.130 + */
  27.131 +static u32 random_spi(void){
  27.132 +    unsigned long left, right = 0;
  27.133 +    u32 spi;
  27.134 +    do{
  27.135 +        left = sa_spi_counter++;
  27.136 +        pseudo_des(&left, &right);
  27.137 +        spi = right;
  27.138 +    } while(!spi);
  27.139 +    return spi;
  27.140 +}
  27.141 +
  27.142 +/** Mangle some input to generate output.
  27.143 + * This is used to derive spis and keying material from secrets,
  27.144 + * so it probably ought to be cryptographically strong.
  27.145 + * Probably ought to use a good hash (sha1) or cipher (aes).
  27.146 + *
  27.147 + * @param input input values
  27.148 + * @param n number of values
  27.149 + * @return mangled value
  27.150 + */
  27.151 +static u32 mangle(u32 input[], int n){
  27.152 +    unsigned long left = 0, right = 0;
  27.153 +    int i;
  27.154 +    for(i=0; i<n; i++){
  27.155 +        left ^= input[i];
  27.156 +        pseudo_des(&left, &right);
  27.157 +    }
  27.158 +    return (u32)right;
  27.159 +}
  27.160 +
  27.161 +/** Generate a spi for a given protocol and address, using a secret key.
  27.162 + * The offset is used when it is necessary to generate more than one spi
  27.163 + * for the same protocol and address.
  27.164 + *
  27.165 + * @param key key
  27.166 + * @param offset offset
  27.167 + * @param protocol protocol
  27.168 + * @param addr IP address
  27.169 + * @return spi
  27.170 + */
  27.171 +static u32 generate_spi(u32 key, u32 offset, u32 protocol, u32 addr){
  27.172 +    u32 input[] = { key, offset, protocol, addr };
  27.173 +    u32 spi;
  27.174 +    dprintf(">\n");
  27.175 +    spi = mangle(input, 4);
  27.176 +    dprintf("< spi=%x\n", spi);
  27.177 +    return spi;
  27.178 +}
  27.179 +
  27.180 +/** Generate keying material for a given spi, based on a
  27.181 + * secret.
  27.182 + *
  27.183 + * @param key secret
  27.184 + * @param offset offset
  27.185 + * @param spi spi
  27.186 + * @return keying material
  27.187 + */
  27.188 +static u32 generate_key(u32 key, u32 offset, u32 spi){
  27.189 +    u32 input[] = { key, offset, spi };
  27.190 +    return mangle(input, 3);
  27.191 +}    
  27.192 +
  27.193 +/** Allocate a spi.
  27.194 + * Want to use random ones.
  27.195 + * So check for ones not in use.
  27.196 + *
  27.197 + * When using static keying, both ends need to agree on key.
  27.198 + * How does that work? Also, will suddenly get traffic using a spi,
  27.199 + * and will have to create SA then. Or need to create in advance.
  27.200 + * But can't do that because don't know peers.
  27.201 + * When get message on a spi that doesn't exist - do what?
  27.202 + * Use a spi related to the destination addr and a secret.
  27.203 + * Then receiver can check if spi is ok and create SA on demand.
  27.204 + * Use hash of key, protocol, addr to generate. Then have to check
  27.205 + * for in-use because of potential collisions. Receiver can do the
  27.206 + * same hash and check spi is in usable range. Then derive keys from
  27.207 + * the spi (using another secret).
  27.208 + *
  27.209 + * @param key spi generation key
  27.210 + * @param protocol protocol
  27.211 + * @param addr IP address
  27.212 + * @param spip return parameter for spi
  27.213 + * @return 0 on success, error code otherwise
  27.214 + */
  27.215 +int sa_spi_alloc(u32 key, u32 protocol, u32 addr, u32 *spip){
  27.216 +    int err = 0;
  27.217 +    int i = 0, n = 100;
  27.218 +    u32 spi;
  27.219 +    for(i = 0; i < n; i++, spi++){
  27.220 +        spi = generate_spi(key, i, protocol, addr);
  27.221 +        if(!spi) continue;
  27.222 +        if(!sa_table_lookup_spi(spi, protocol, addr)){
  27.223 +            *spip = spi;
  27.224 +            goto exit;
  27.225 +        }
  27.226 +    }
  27.227 +    err = -ENOMEM;
  27.228 +  exit:
  27.229 +    return err;
  27.230 +}
  27.231 +
  27.232 +/** Table of SAs. Indexed by unique id and spi/protocol/addr triple.
  27.233 + */
  27.234 +static HashTable *sa_table = NULL;
  27.235 +
  27.236 +static u32 sa_id = 1;
  27.237 +
  27.238 +/** Hash an SA id.
  27.239 + *
  27.240 + * @param id SA id
  27.241 + * @return hashcode
  27.242 + */
  27.243 +static inline Hashcode sa_table_hash_id(u32 id){
  27.244 +    return hash_ul(id);
  27.245 +}
  27.246 +
  27.247 +/** Hash SA spi/protocol/addr.
  27.248 + *
  27.249 + * @param spi spi
  27.250 + * @param protocol protocol
  27.251 + * @param addr IP address
  27.252 + * @return hashcode
  27.253 + */
  27.254 +static inline Hashcode sa_table_hash_spi(u32 spi, u32 protocol, u32 addr){
  27.255 +    Hashcode h = 0;
  27.256 +    h = hash_2ul(spi, protocol);
  27.257 +    h = hash_hul(h, addr);
  27.258 +    return h;
  27.259 +}
  27.260 +
  27.261 +/** Test if an SA entry has a given value.
  27.262 + *
  27.263 + * @param arg contains SA pointer
  27.264 + * @param table hashtable
  27.265 + * @param entry entry containing SA
  27.266 + * @return 1 if it does, 0 otherwise
  27.267 + */
  27.268 +static int sa_table_state_fn(TableArg arg, HashTable *table, HTEntry *entry){
  27.269 +    return entry->value == arg.ptr;
  27.270 +}
  27.271 +
  27.272 +/** Test if an SA entry has a given id.
  27.273 + *
  27.274 + * @param arg contains SA id
  27.275 + * @param table hashtable
  27.276 + * @param entry entry containing SA
  27.277 + * @return 1 if it does, 0 otherwise
  27.278 + */
  27.279 +static int sa_table_id_fn(TableArg arg, HashTable *table, HTEntry *entry){
  27.280 +    SAState *state = entry->value;
  27.281 +    u32 id = arg.ul;
  27.282 +    return state->ident.id == id;
  27.283 +}
  27.284 +
  27.285 +/** Test if an SA entry has a given spi/protocol/addr.
  27.286 + *
  27.287 + * @param arg contains SAIdent pointer
  27.288 + * @param table hashtable
  27.289 + * @param entry entry containing SA
  27.290 + * @return 1 if it does, 0 otherwise
  27.291 + */
  27.292 +static int sa_table_spi_fn(TableArg arg, HashTable *table, HTEntry *entry){
  27.293 +    SAState *state = entry->value;
  27.294 +    SAIdent *ident = arg.ptr;
  27.295 +    return state->ident.spi      == ident->spi
  27.296 +        && state->ident.protocol == ident->protocol
  27.297 +        && state->ident.addr     == ident->addr;
  27.298 +}
  27.299 +
  27.300 +/** Free an SA entry. Decrements the SA refcount and frees the entry.
  27.301 + *
  27.302 + * @param table containing table
  27.303 + * @param entry to free
  27.304 + */
  27.305 +void sa_table_free_fn(HashTable *table, HTEntry *entry){
  27.306 +    if(!entry) return;
  27.307 +    if(entry->value){
  27.308 +        SAState *state = entry->value;
  27.309 +        SAState_decref(state);
  27.310 +    }
  27.311 +    deallocate(entry);
  27.312 +}
  27.313 +
  27.314 +/** Initialize the SA table.
  27.315 + *
  27.316 + * @return 0 on success, error code otherwise
  27.317 + */
  27.318 +int sa_table_init(void){
  27.319 +    int err = 0;
  27.320 +    sa_table = HashTable_new(0);
  27.321 +    if(!sa_table){
  27.322 +        err = -ENOMEM;
  27.323 +        goto exit;
  27.324 +    }
  27.325 +    sa_table->entry_free_fn = sa_table_free_fn;
  27.326 +
  27.327 +  exit:
  27.328 +    return err;
  27.329 +}
  27.330 +
  27.331 +void sa_table_exit(void){
  27.332 +    HashTable_free(sa_table);
  27.333 +}
  27.334 +
  27.335 +/** Remove an SA from the table.
  27.336 + *
  27.337 + * @param state SA
  27.338 + */
  27.339 +int sa_table_delete(SAState *state){
  27.340 +    int count = 0;
  27.341 +    Hashcode h1, h2;
  27.342 +    TableArg arg = { .ptr = state };
  27.343 +    // Remove by id.
  27.344 +    h1 = sa_table_hash_id(state->ident.id);
  27.345 +    count += HashTable_remove_entry(sa_table, h1, sa_table_state_fn, arg);
  27.346 +    // Remove by spi/protocol/addr if spi nonzero.
  27.347 +    if(!state->ident.spi) goto exit;
  27.348 +    h2 = sa_table_hash_spi(state->ident.spi, state->ident.protocol, state->ident.addr);
  27.349 +    if(h1 == h2) goto exit;
  27.350 +    count += HashTable_remove_entry(sa_table, h2, sa_table_state_fn, arg);
  27.351 +  exit:
  27.352 +    return count;
  27.353 +}
  27.354 +
  27.355 +/** Add an SA to the table.
  27.356 + * The SA is indexed by id and spi/protocol/addr (if the spi is non-zero).
  27.357 + *
  27.358 + * @param state SA
  27.359 + * @return 0 on success, error code otherwise
  27.360 + */
  27.361 +int sa_table_add(SAState *state){
  27.362 +    int err = 0;
  27.363 +    Hashcode h1, h2;
  27.364 +    int entries = 0;
  27.365 +
  27.366 +    dprintf(">\n");
  27.367 +    // Index by id.
  27.368 +    h1 = sa_table_hash_id(state->ident.id);
  27.369 +    if(!HashTable_add_entry(sa_table, h1, HKEY(state->ident.id), state)){
  27.370 +        err = -ENOMEM;
  27.371 +        goto exit;
  27.372 +    }
  27.373 +    entries++;
  27.374 +    SAState_incref(state);
  27.375 +    // Index by spi/protocol/addr if spi non-zero.
  27.376 +    if(state->ident.spi){
  27.377 +        h2 = sa_table_hash_spi(state->ident.spi, state->ident.protocol, state->ident.addr);
  27.378 +        if(h1 != h2){
  27.379 +            if(!HashTable_add_entry(sa_table, h2, HKEY(state->ident.id), state)){
  27.380 +                err = -ENOMEM;
  27.381 +                goto exit;
  27.382 +            }
  27.383 +            entries++;
  27.384 +            SAState_incref(state);
  27.385 +        }
  27.386 +    }
  27.387 +  exit:
  27.388 +    if(err && entries){
  27.389 +        sa_table_delete(state);
  27.390 +    }
  27.391 +    dprintf("< err=%d\n", err);
  27.392 +    return err;
  27.393 +}
  27.394 +
  27.395 +
  27.396 +/** Find an SA by spi/protocol/addr.
  27.397 + * Increments the SA refcount on success.
  27.398 + *
  27.399 + * @param spi spi
  27.400 + * @param protocol protocol
  27.401 + * @param addr IP address
  27.402 + * @return SA or NULL
  27.403 + */
  27.404 +SAState * sa_table_lookup_spi(u32 spi, u32 protocol, u32 addr){
  27.405 +    SAState *state = NULL;
  27.406 +    Hashcode h;
  27.407 +    SAIdent id = {
  27.408 +        .spi      = spi,
  27.409 +        .protocol = protocol,
  27.410 +        .addr     = addr };
  27.411 +    TableArg arg = { .ptr = &id };
  27.412 +    HTEntry *entry = NULL;
  27.413 +
  27.414 +    h = sa_table_hash_spi(spi, protocol, addr);
  27.415 +    entry = HashTable_find_entry(sa_table, h, sa_table_spi_fn, arg);
  27.416 +    if(entry){
  27.417 +        state = entry->value;
  27.418 +        SAState_incref(state);
  27.419 +    }
  27.420 +    return state;
  27.421 +}
  27.422 +
  27.423 +/** Find an SA by unique id.
  27.424 + * Increments the SA refcount on success.
  27.425 + *
  27.426 + * @param id id
  27.427 + * @return SA or NULL
  27.428 + */
  27.429 +SAState * sa_table_lookup_id(u32 id){
  27.430 +    Hashcode h;
  27.431 +    TableArg arg = { .ul = id };
  27.432 +    HTEntry *entry = NULL;
  27.433 +    SAState *state = NULL;
  27.434 +
  27.435 +    dprintf("> id=%u\n", id);
  27.436 +    h = sa_table_hash_id(id);
  27.437 +    entry = HashTable_find_entry(sa_table, h, sa_table_id_fn, arg);
  27.438 +    if(entry){
  27.439 +        state = entry->value;
  27.440 +        SAState_incref(state);
  27.441 +    }
  27.442 +    dprintf("< state=%p\n", state);
  27.443 +    return state;
  27.444 +}
  27.445 +
  27.446 +/** Replace an existing SA by another in the table.
  27.447 + * The existing SA is not removed if the new one cannot be added.
  27.448 + *
  27.449 + * @param existing SA to replace
  27.450 + * @param state new SA
  27.451 + * @return 0 on success, error code otherwise
  27.452 + */
  27.453 +static int sa_table_replace(SAState *existing, SAState *state){
  27.454 +    int err = 0;
  27.455 +    // Need check for in-use?
  27.456 +    
  27.457 +    dprintf(">\n");
  27.458 +    if(existing->keying.state != SA_STATE_ACQUIRE){
  27.459 +        err = -EINVAL;
  27.460 +        goto exit;
  27.461 +    }
  27.462 +    // replace it.
  27.463 +    err = sa_table_add(state);
  27.464 +    if(err) goto exit;
  27.465 +    sa_table_delete(existing);
  27.466 +  exit:
  27.467 +    dprintf("< err=%d\n", err);
  27.468 +    return err;
  27.469 +}
  27.470 +
  27.471 +/** Allocate an SA.
  27.472 + *
  27.473 + * @return SA or NULL
  27.474 + */
  27.475 +SAState *SAState_alloc(void){
  27.476 +    SAState *state;
  27.477 +    
  27.478 +    dprintf(">\n");
  27.479 +    state = kmalloc(sizeof(SAState), GFP_ATOMIC);
  27.480 +    if(!state) goto exit;
  27.481 +    *state = (SAState){};
  27.482 +    atomic_set(&state->refcount, 1);
  27.483 +    state->lock = SPIN_LOCK_UNLOCKED;
  27.484 +  exit:
  27.485 +    dprintf("< state=%p\n", state);
  27.486 +    return state;
  27.487 +}
  27.488 +
  27.489 +/** Create an SA in initial state.
  27.490 + * It has no spi and its keying state is acquire.
  27.491 + * It must have a unique id, protocol and address.
  27.492 + * At some point it should get updated with a complete SA.
  27.493 + *
  27.494 + * @param ident SA identifier
  27.495 + * @param statep return parameter for new SA
  27.496 + * @return 0 on success, error code otherwise
  27.497 + */
  27.498 +int SAState_init(SAIdent *ident, SAState **statep){
  27.499 +    int err = 0;
  27.500 +    SAState *state = NULL;
  27.501 +   
  27.502 +    if(ident->spi || !ident->id){
  27.503 +        err = -EINVAL;
  27.504 +        goto exit;
  27.505 +    }
  27.506 +    state = SAState_alloc();
  27.507 +    if (!state){
  27.508 +        err = -ENOMEM;
  27.509 +        goto exit;
  27.510 +    }
  27.511 +    state->ident = *ident;
  27.512 +    state->keying.state = SA_STATE_ACQUIRE;
  27.513 +  exit:
  27.514 +    return err;
  27.515 +}
  27.516 +
  27.517 +/** Create a complete SA, with spi and cipher suite.
  27.518 + *
  27.519 + * @param info SA parameters
  27.520 + * @param statep return parameter for new SA
  27.521 + * @return 0 on success, error code otherwise
  27.522 + */
  27.523 +int SAState_create(SAInfo *info, SAState **statep){
  27.524 +    int err = 0;
  27.525 +    SAState *state = NULL;
  27.526 +
  27.527 +    dprintf(">\n");
  27.528 +    state = SAState_alloc();
  27.529 +    if (!state){
  27.530 +        err = -ENOMEM;
  27.531 +        goto exit;
  27.532 +    }
  27.533 +    state->ident = info->ident;
  27.534 +    state->limits = info->limits;
  27.535 +    state->digest = info->digest;
  27.536 +    state->cipher = info->cipher;
  27.537 +    state->compress = info->compress;
  27.538 +    state->security = info->security;
  27.539 +    err = SAType_get(state->ident.protocol, &state->type);
  27.540 +    if (err) goto exit;
  27.541 +    err = state->type->init(state, NULL);
  27.542 +    if (err) goto exit;
  27.543 +    state->keying.state = SA_STATE_VALID;
  27.544 +  exit:
  27.545 +    if(err){
  27.546 +        SAState_decref(state);
  27.547 +        state = NULL;
  27.548 +    }
  27.549 +    *statep = state;
  27.550 +    dprintf("< err=%d\n", err);
  27.551 +    return err;
  27.552 +}
  27.553 +
  27.554 +/** Create an SA for the given spi etc.
  27.555 + * For now we fix the cipher suite and the keys.
  27.556 + * Digest is SHA1 HMAC with a 128-bit key.
  27.557 + * Cipher is AES (Rijndael) in CBC mode with a 128-bit key.
  27.558 + *
  27.559 + * The cipher suite and keys should really come from policy, with the
  27.560 + * possibility of negotiating them with the peer (using IKE).
  27.561 + * Negotiation creates difficulties though - because the SA cannot
  27.562 + * be created immediately we have to be able to queue packets
  27.563 + * while the SA is being negotiated.
  27.564 + *
  27.565 + * @param spi spi
  27.566 + * @param protocol protocol
  27.567 + * @param addr address
  27.568 + * @param sa return parameter for SA
  27.569 + * @return 0 on success, error code otherwise
  27.570 + */
  27.571 +int sa_create(int security, u32 spi, u32 protocol, u32 addr, SAState **sa){
  27.572 +    int err = 0;
  27.573 +    SAInfo info = {};
  27.574 +    char *digest_name = "sha1";
  27.575 +    char *digest_key = "0123456789abcdef";
  27.576 +    int digest_key_n = strlen(digest_key);
  27.577 +    char *cipher_name= "aes";
  27.578 +    char *cipher_key = "0123456789ABCDEF";
  27.579 +    int cipher_key_n = strlen(cipher_key);
  27.580 +
  27.581 +    dprintf("> security=%d spi=%u protocol=%u addr=" IPFMT "\n",
  27.582 +            security, spi, protocol, NIPQUAD(addr));
  27.583 +    if(!spi){
  27.584 +        spi = generate_spi(0, 0, protocol, addr);
  27.585 +    }
  27.586 +    dprintf("> info...\n");
  27.587 +    info.ident.id = sa_id++;
  27.588 +    info.ident.spi = spi;
  27.589 +    info.ident.protocol = protocol;
  27.590 +    info.ident.addr = addr;
  27.591 +    info.security = security;
  27.592 +
  27.593 +    //sa_algorithm_probe_all();
  27.594 +
  27.595 +    dprintf("> digest name=%s key_n=%d\n", digest_name, digest_key_n);
  27.596 +    strcpy(info.digest.name, digest_name);
  27.597 +    info.digest.bits = digest_key_n * 8;
  27.598 +    memcpy(info.digest.key, digest_key, digest_key_n);
  27.599 +
  27.600 +    if(security & SA_CONF){
  27.601 +        dprintf("> cipher name=%s key_n=%d\n", cipher_name, cipher_key_n);
  27.602 +        strcpy(info.cipher.name, cipher_name);
  27.603 +        info.cipher.bits = cipher_key_n * 8;
  27.604 +        memcpy(info.cipher.key, cipher_key, cipher_key_n);
  27.605 +    } else {
  27.606 +        dprintf("> cipher name=%s key_n=%d\n", "cipher_null", 0);
  27.607 +        strcpy(info.cipher.name, "cipher_null");
  27.608 +        info.cipher.bits = 0;
  27.609 +        memset(info.cipher.key, 0, sizeof(info.cipher.key));
  27.610 +    }
  27.611 +
  27.612 +    err = sa_set(&info, 0, sa);
  27.613 +    dprintf("< err=%d\n", err);
  27.614 +    return err;
  27.615 +}
  27.616 +
  27.617 +/** Create or update an SA.
  27.618 + * The SA is added to the table.
  27.619 + *
  27.620 + * @param info SA parameters
  27.621 + * @param update create if zero, update otherwise
  27.622 + * @return 0 on success, error code otherwise
  27.623 + */
  27.624 +int sa_set(SAInfo *info, int update, SAState **val){
  27.625 +    int err = 0;
  27.626 +    SAState *state = NULL;
  27.627 +    SAState *existing = NULL;
  27.628 +
  27.629 +    dprintf("> info=%p update=%d val=%p\n", info, update, val);
  27.630 +    existing = sa_table_lookup_id(info->ident.id);
  27.631 +    if(update && !existing){
  27.632 +        err = -ENOENT;
  27.633 +    } else if(!update && existing){
  27.634 +        err = -EINVAL;
  27.635 +    }
  27.636 +    if(err) goto exit;
  27.637 +    err = SAState_create(info, &state);
  27.638 +    if (err) goto exit;
  27.639 +    if(existing){
  27.640 +        err = sa_table_replace(existing, state);
  27.641 +    } else {
  27.642 +        err = sa_table_add(state);
  27.643 +    }
  27.644 +  exit:
  27.645 +    if(existing) SAState_decref(existing);
  27.646 +    if(val && !err){
  27.647 +        *val = state;
  27.648 +    } else {
  27.649 +        SAState_decref(state);
  27.650 +    }
  27.651 +    dprintf("< err=%d\n", err);
  27.652 +    return err;
  27.653 +}
  27.654 +
  27.655 +/** Delete an SA. Removes it from the SA table.
  27.656 + * It is an error if no SA with the given id exists.
  27.657 + *
  27.658 + * @param id SA id
  27.659 + * @return 0 on success, error code otherwise
  27.660 + */
  27.661 +int sa_delete(int id){
  27.662 +    int err = 0;
  27.663 +    SAState *state;
  27.664 +    state = sa_table_lookup_id(id);
  27.665 +    if (!state){
  27.666 +        err = -ENOENT;
  27.667 +        goto exit;
  27.668 +    }
  27.669 +    sa_table_delete(state);
  27.670 +    SAState_decref(state);
  27.671 +  exit:
  27.672 +    return err;
  27.673 +}
    28.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    28.2 +++ b/tools/vnet/vnet-module/sa.h	Mon Nov 22 16:49:15 2004 +0000
    28.3 @@ -0,0 +1,199 @@
    28.4 +/*
    28.5 + * Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
    28.6 + *
    28.7 + * This program is free software; you can redistribute it and/or modify
    28.8 + * it under the terms of the GNU General Public License as published by the 
    28.9 + * Free Software Foundation; either version 2 of the License, or (at your
   28.10 + * option) any later version.
   28.11 + * 
   28.12 + * This program is distributed in the hope that it will be useful, but
   28.13 + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
   28.14 + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
   28.15 + * for more details.
   28.16 + *
   28.17 + * You should have received a copy of the GNU General Public License along
   28.18 + * with this program; if not, write to the Free software Foundation, Inc.,
   28.19 + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA
   28.20 + *
   28.21 + */
   28.22 +#ifndef __VNET_SA_H__
   28.23 +#define __VNET_SA_H__
   28.24 +
   28.25 +#include <linux/types.h>
   28.26 +#include <linux/crypto.h>
   28.27 +
   28.28 +#include <tunnel.h>
   28.29 +
   28.30 +#ifndef CRYPTO_MAX_KEY_BYTES
   28.31 +#define CRYPTO_MAX_KEY_BYTES            64
   28.32 +#define CRYPTO_MAX_KEY_BITS             (CRYPTO_MAX_KEY_BYTES * 8)
   28.33 +#endif
   28.34 +
   28.35 +typedef struct SALimits {
   28.36 +    u64 bytes_soft;
   28.37 +    u64 bytes_hard;
   28.38 +    u64 packets_soft;
   28.39 +    u64 packets_hard;
   28.40 +} SALimits;
   28.41 +
   28.42 +typedef struct SACounts {
   28.43 +    u64 bytes;
   28.44 +    u64 packets;
   28.45 +    u32 integrity_failures;
   28.46 +} SACounts;
   28.47 +
   28.48 +typedef struct SAReplay {
   28.49 +    int replay;
   28.50 +    u32 send_seq;
   28.51 +    u32 recv_seq;
   28.52 +    u32 bitmap;
   28.53 +    u32 replay_window;
   28.54 +} SAReplay;
   28.55 +
   28.56 +typedef struct SAKey {
   28.57 +    char name[CRYPTO_MAX_ALG_NAME];
   28.58 +    int bits;
   28.59 +    char key[CRYPTO_MAX_KEY_BYTES];
   28.60 +} SAKey;
   28.61 +
   28.62 +typedef struct SAKeying {
   28.63 +    u8 state;
   28.64 +    u8 dying;
   28.65 +} SAKeying;
   28.66 +
   28.67 +typedef struct SAIdent {
   28.68 +    u32 id;
   28.69 +    u32 spi;
   28.70 +    u32 addr;
   28.71 +    u32 protocol;
   28.72 +} SAIdent;
   28.73 +
   28.74 +struct SAType;
   28.75 +
   28.76 +/** Security assocation (SA). */
   28.77 +typedef struct SAState {
   28.78 +    atomic_t refcount;
   28.79 +    spinlock_t lock;
   28.80 +    /** Identifier. */
   28.81 +    struct SAIdent ident;
   28.82 +    /** Security flags. */
   28.83 +    int security;
   28.84 +    /** Keying state. */
   28.85 +    struct SAKeying keying;
   28.86 +    /** Byte counts etc. */
   28.87 +    struct SACounts counts;
   28.88 +    /** Byte limits etc. */
   28.89 +    struct SALimits limits;
   28.90 +    /** Replay protection. */
   28.91 +    struct SAReplay replay;
   28.92 +    /** Digest algorithm. */
   28.93 +    struct SAKey digest;
   28.94 +    /** Cipher algorithm. */
   28.95 +    struct SAKey cipher;
   28.96 +    /** Compress algorith. */
   28.97 +    struct SAKey compress;
   28.98 +    /** SA type (ESP, AH). */
   28.99 +    struct SAType *type;
  28.100 +    /** Data for the SA type to use. */
  28.101 +    void *data;
  28.102 +} SAState;
  28.103 +    
  28.104 +typedef struct SAType {
  28.105 +    char *name;
  28.106 +    int protocol;
  28.107 +    int (*init)(SAState *state, void *args);
  28.108 +    void (*fini)(SAState *state);
  28.109 +    int (*recv)(SAState *state, struct sk_buff *skb);
  28.110 +    int (*send)(SAState *state, struct sk_buff *skb, Tunnel *tunnel);
  28.111 +    u32 (*size)(SAState *state, int size);
  28.112 +} SAType;
  28.113 +
  28.114 +/** Information needed to create an SA.
  28.115 + * Unused algorithms have zero key size.
  28.116 + */
  28.117 +typedef struct SAInfo {
  28.118 +    /** Identifier. */
  28.119 +    SAIdent ident;
  28.120 +    /** Security flags. */
  28.121 +    int security;
  28.122 +    /** Digest algorithm and key. */
  28.123 +    SAKey digest;
  28.124 +    /** Cipher algorithm and key. */
  28.125 +    SAKey cipher;
  28.126 +    /** Compress algorithm and key. */
  28.127 +    SAKey compress;
  28.128 +    /** SA lifetime limits. */
  28.129 +    SALimits limits;
  28.130 +    /** Replay protection window. */
  28.131 +    int replay_window;
  28.132 +} SAInfo;
  28.133 +
  28.134 +enum sa_alg_type {
  28.135 +    SA_ALG_DIGEST = 1,
  28.136 +    SA_ALG_CIPHER = 2,
  28.137 +    SA_ALG_COMPRESS = 3,
  28.138 +};
  28.139 +
  28.140 +extern int SAType_add(SAType *type);
  28.141 +extern int SAType_del(SAType *type);
  28.142 +extern int SAType_get(int protocol, SAType **type);
  28.143 +
  28.144 +extern int sa_table_init(void);
  28.145 +extern void sa_table_exit(void);
  28.146 +extern int sa_table_delete(SAState *state);
  28.147 +extern int sa_table_add(SAState *state);
  28.148 +extern SAState * sa_table_lookup_spi(u32 spi, u32 protocol, u32 addr);
  28.149 +extern SAState * sa_table_lookup_id(u32 id);
  28.150 +
  28.151 +/** Increment reference count.
  28.152 + *
  28.153 + * @param sa security association (may be null)
  28.154 + */
  28.155 +static inline void SAState_incref(SAState *sa){
  28.156 +    if(!sa) return;
  28.157 +    atomic_inc(&sa->refcount);
  28.158 +}
  28.159 +
  28.160 +/** Decrement reference count, freeing if zero.
  28.161 + *
  28.162 + * @param sa security association (may be null)
  28.163 + */
  28.164 +static inline void SAState_decref(SAState *sa){
  28.165 +    if(!sa) return;
  28.166 +    if(atomic_dec_and_test(&sa->refcount)){
  28.167 +        sa->type->fini(sa);
  28.168 +        kfree(sa);
  28.169 +    }
  28.170 +}
  28.171 +
  28.172 +extern SAState *SAState_alloc(void);
  28.173 +extern int SAState_init(SAIdent *id, SAState **statep);
  28.174 +extern int SAState_create(SAInfo *info, SAState **statep);
  28.175 +
  28.176 +static inline int SAState_send(SAState *sa, struct sk_buff *skb, Tunnel *tunnel){
  28.177 +    return sa->type->send(sa, skb, tunnel);
  28.178 +}
  28.179 +
  28.180 +static inline int SAState_recv(SAState *sa, struct sk_buff *skb){
  28.181 +    return sa->type->recv(sa, skb);
  28.182 +}
  28.183 +
  28.184 +static inline int SAState_size(SAState *sa, int n){
  28.185 +    return sa->type->size(sa, n);
  28.186 +}
  28.187 +
  28.188 +extern int sa_create(int security, u32 spi, u32 protocol, u32 addr, SAState **sa);
  28.189 +extern int sa_set(SAInfo *info, int update, SAState **val);
  28.190 +extern int sa_delete(int id);
  28.191 +
  28.192 +enum {
  28.193 +    SA_AUTH = 1,
  28.194 +    SA_CONF = 2
  28.195 +};
  28.196 +
  28.197 +enum {
  28.198 +    SA_STATE_ACQUIRE = 1,
  28.199 +    SA_STATE_VALID   = 2,
  28.200 +};
  28.201 +
  28.202 +#endif /* !__VNET_SA_H__ */
    29.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    29.2 +++ b/tools/vnet/vnet-module/sa_algorithm.c	Mon Nov 22 16:49:15 2004 +0000
    29.3 @@ -0,0 +1,367 @@
    29.4 +/* 
    29.5 + * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
    29.6 + * Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
    29.7 + *
    29.8 + * This program is free software; you can redistribute it and/or modify
    29.9 + * it under the terms of the GNU General Public License as published by the 
   29.10 + * Free Software Foundation; either version 2 of the License, or (at your
   29.11 + * option) any later version.
   29.12 + * 
   29.13 + * This program is distributed in the hope that it will be useful, but
   29.14 + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
   29.15 + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
   29.16 + * for more details.
   29.17 + *
   29.18 + * You should have received a copy of the GNU General Public License along
   29.19 + * with this program; if not, write to the Free software Foundation, Inc.,
   29.20 + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA
   29.21 + *
   29.22 + */
   29.23 +#include <linux/config.h>
   29.24 +#include <linux/kernel.h>
   29.25 +#include <linux/string.h>
   29.26 +#include <linux/crypto.h>
   29.27 +#include <linux/sched.h>
   29.28 +//#include <asm/softirq.h>
   29.29 +
   29.30 +#include <sa_algorithm.h>
   29.31 +
   29.32 +#define MODULE_NAME "IPSEC"
   29.33 +#define DEBUG 1
   29.34 +#undef DEBUG
   29.35 +#include "debug.h"
   29.36 +
   29.37 +/** @file Tables of supported IPSEC algorithms.
   29.38 + * Has tables for digests, ciphers and compression algorithms.
   29.39 + */
   29.40 +
   29.41 +/*
   29.42 + * Algorithms supported by IPsec.  These entries contain properties which
   29.43 + * are used in key negotiation and sa processing, and are used to verify
   29.44 + * that instantiated crypto transforms have correct parameters for IPsec
   29.45 + * purposes.
   29.46 + */
   29.47 +
   29.48 +/** Digests. */
   29.49 +static SAAlgorithm digest_alg[] = {
   29.50 +    {
   29.51 +        .name = "digest_null",
   29.52 +        .info = {
   29.53 +            .digest = {
   29.54 +                .icv_truncbits = 0,
   29.55 +                .icv_fullbits = 0,
   29.56 +            }
   29.57 +        },
   29.58 +        .alg = {
   29.59 +            .sadb_alg_id = SADB_X_AALG_NULL,
   29.60 +            .sadb_alg_ivlen = 0,
   29.61 +            .sadb_alg_minbits = 0,
   29.62 +            .sadb_alg_maxbits = 0
   29.63 +        }
   29.64 +    },
   29.65 +    {
   29.66 +	.name = "md5",
   29.67 +	.info = { .digest = {
   29.68 +            .icv_truncbits = 96,
   29.69 +            .icv_fullbits = 128,
   29.70 +        } },
   29.71 +        .alg = {
   29.72 +            .sadb_alg_id = SADB_AALG_MD5HMAC,
   29.73 +            .sadb_alg_ivlen = 0,
   29.74 +            .sadb_alg_minbits = 128,
   29.75 +            .sadb_alg_maxbits = 128
   29.76 +	}
   29.77 +    },
   29.78 +    {
   29.79 +	.name = "sha1",
   29.80 +	.info = {
   29.81 +            .digest = {
   29.82 +                .icv_truncbits = 96,
   29.83 +                .icv_fullbits = 160,
   29.84 +            }
   29.85 +	},
   29.86 +	.alg = {
   29.87 +            .sadb_alg_id = SADB_AALG_SHA1HMAC,
   29.88 +            .sadb_alg_ivlen = 0,
   29.89 +            .sadb_alg_minbits = 160,
   29.90 +            .sadb_alg_maxbits = 160
   29.91 +	}
   29.92 +    },
   29.93 +    {
   29.94 +	.name = "sha256",
   29.95 +	.info = {
   29.96 +            .digest = {
   29.97 +                .icv_truncbits = 128,
   29.98 +                .icv_fullbits = 256,
   29.99 +            }
  29.100 +	},
  29.101 +	.alg = {
  29.102 +            .sadb_alg_id = SADB_X_AALG_SHA2_256HMAC,
  29.103 +            .sadb_alg_ivlen = 0,
  29.104 +            .sadb_alg_minbits = 256,
  29.105 +            .sadb_alg_maxbits = 256
  29.106 +	}
  29.107 +    },
  29.108 +/*     { */
  29.109 +/*         .name = "ripemd160", */
  29.110 +/*         .info = { */
  29.111 +/*             .digest = { */
  29.112 +/*                 .icv_truncbits = 96, */
  29.113 +/*                 .icv_fullbits = 160, */
  29.114 +/*             } */
  29.115 +/* 	}, */
  29.116 +/*         .alg = { */
  29.117 +/*             .sadb_alg_id = SADB_X_AALG_RIPEMD160HMAC, */
  29.118 +/*             .sadb_alg_ivlen = 0, */
  29.119 +/*             .sadb_alg_minbits = 160, */
  29.120 +/*             .sadb_alg_maxbits = 160 */
  29.121 +/* 	} */
  29.122 +/*     }, */
  29.123 +    { /* Terminator */ }
  29.124 +};
  29.125 +
  29.126 +/** Ciphers. */
  29.127 +static SAAlgorithm cipher_alg[] = {
  29.128 +    {
  29.129 +	.name = "cipher_null",
  29.130 +        .info = {
  29.131 +            .cipher = {
  29.132 +                .blockbits = 8,
  29.133 +                .defkeybits = 0,
  29.134 +            }
  29.135 +	},
  29.136 +        .alg = {
  29.137 +            .sadb_alg_id =	SADB_EALG_NULL,
  29.138 +            .sadb_alg_ivlen = 0,
  29.139 +            .sadb_alg_minbits = 0,
  29.140 +            .sadb_alg_maxbits = 0
  29.141 +	}
  29.142 +    },
  29.143 +    {
  29.144 +        .name = "des",
  29.145 +        .info = {
  29.146 +            .cipher = {
  29.147 +                .blockbits = 64,
  29.148 +                .defkeybits = 64,
  29.149 +            }
  29.150 +	},
  29.151 +        .alg = {
  29.152 +            .sadb_alg_id = SADB_EALG_DESCBC,
  29.153 +            .sadb_alg_ivlen = 8,
  29.154 +            .sadb_alg_minbits = 64,
  29.155 +            .sadb_alg_maxbits = 64
  29.156 +	}
  29.157 +    },
  29.158 +    {
  29.159 +	.name = "des3_ede",
  29.160 +	.info = {
  29.161 +            .cipher = {
  29.162 +                .blockbits = 64,
  29.163 +                .defkeybits = 192,
  29.164 +            }
  29.165 +	},
  29.166 +        .alg = {
  29.167 +            .sadb_alg_id = SADB_EALG_3DESCBC,
  29.168 +            .sadb_alg_ivlen = 8,
  29.169 +            .sadb_alg_minbits = 192,
  29.170 +            .sadb_alg_maxbits = 192
  29.171 +	}
  29.172 +    },
  29.173 +/*     { */
  29.174 +/* 	.name = "cast128", */ //cast5?
  29.175 +/* 	.info = { */
  29.176 +/*             .cipher = { */
  29.177 +/*                 .blockbits = 64, */
  29.178 +/*                 .defkeybits = 128, */
  29.179 +/*             } */
  29.180 +/* 	}, */
  29.181 +/* 	.alg = { */
  29.182 +/*             .sadb_alg_id = SADB_X_EALG_CASTCBC, */
  29.183 +/*             .sadb_alg_ivlen = 8, */
  29.184 +/*             .sadb_alg_minbits = 40, */
  29.185 +/*             .sadb_alg_maxbits = 128 */
  29.186 +/* 	} */
  29.187 +/*     }, */
  29.188 +    {
  29.189 +	.name = "blowfish",
  29.190 +        .info = {
  29.191 +            .cipher = {
  29.192 +                .blockbits = 64,
  29.193 +                .defkeybits = 128,
  29.194 +            }
  29.195 +	},
  29.196 +	.alg = {
  29.197 +            .sadb_alg_id = SADB_X_EALG_BLOWFISHCBC,
  29.198 +            .sadb_alg_ivlen = 8,
  29.199 +            .sadb_alg_minbits = 40,
  29.200 +            .sadb_alg_maxbits = 448
  29.201 +	}
  29.202 +    },
  29.203 +    {
  29.204 +	.name = "aes",
  29.205 +	.info = {
  29.206 +            .cipher = {
  29.207 +                .blockbits = 128,
  29.208 +                .defkeybits = 128,
  29.209 +            }
  29.210 +	},
  29.211 +	.alg = {
  29.212 +            .sadb_alg_id = SADB_X_EALG_AESCBC,
  29.213 +            .sadb_alg_ivlen = 8,
  29.214 +            .sadb_alg_minbits = 128,
  29.215 +            .sadb_alg_maxbits = 256
  29.216 +	}
  29.217 +    },
  29.218 +    { /* Terminator */ }
  29.219 +};
  29.220 +
  29.221 +/** Compressors. */
  29.222 +static SAAlgorithm compress_alg[] = {
  29.223 +    {
  29.224 +	.name = "deflate",
  29.225 +	.info = {
  29.226 +            .compress = {
  29.227 +                .threshold = 90,
  29.228 +            }
  29.229 +	},
  29.230 +	.alg = { .sadb_alg_id = SADB_X_CALG_DEFLATE }
  29.231 +    },
  29.232 +/*     { */
  29.233 +/* 	.name = "lzs", */
  29.234 +/* 	.info = { */
  29.235 +/*             .compress = { */
  29.236 +/*                 .threshold = 90, */
  29.237 +/*             } */
  29.238 +/* 	}, */
  29.239 +/* 	.alg = { .sadb_alg_id = SADB_X_CALG_LZS } */
  29.240 +/*     }, */
  29.241 +/*     { */
  29.242 +/* 	.name = "lzjh", */
  29.243 +/* 	.info = { */
  29.244 +/*             .compress = { */
  29.245 +/*                 .threshold = 50, */
  29.246 +/*             } */
  29.247 +/* 	}, */
  29.248 +/* 	.alg = { .sadb_alg_id = SADB_X_CALG_LZJH } */
  29.249 +/*     }, */
  29.250 +    { /* Terminator */ }
  29.251 +};
  29.252 +
  29.253 +static SAAlgorithm *sa_algorithm_by_id(SAAlgorithm *algo, int alg_id) {
  29.254 +    for( ; algo && algo->name; algo++){
  29.255 +        if (algo->alg.sadb_alg_id == alg_id) {
  29.256 +            return (algo->available ? algo : NULL);
  29.257 +        }
  29.258 +    }
  29.259 +    return NULL;
  29.260 +}
  29.261 +
  29.262 +
  29.263 +static SAAlgorithm *sa_algorithm_by_name(SAAlgorithm *algo, char *name) {
  29.264 +	if (!name) return NULL;
  29.265 +	for( ; algo && algo->name; algo++){
  29.266 +		if (strcmp(name, algo->name) == 0) {
  29.267 +                    return (algo->available ? algo : NULL);
  29.268 +                }
  29.269 +	}
  29.270 +	return NULL;
  29.271 +}
  29.272 +
  29.273 +SAAlgorithm *sa_digest_by_id(int alg_id) {
  29.274 +    return sa_algorithm_by_id(digest_alg, alg_id);
  29.275 +}
  29.276 +
  29.277 +SAAlgorithm *sa_cipher_by_id(int alg_id) {
  29.278 +    return sa_algorithm_by_id(cipher_alg, alg_id);
  29.279 +}
  29.280 +
  29.281 +SAAlgorithm *sa_compress_by_id(int alg_id) {
  29.282 +    return sa_algorithm_by_id(compress_alg, alg_id);
  29.283 +}
  29.284 +
  29.285 +SAAlgorithm *sa_digest_by_name(char *name) {
  29.286 +    return sa_algorithm_by_name(digest_alg, name);
  29.287 +}
  29.288 +
  29.289 +SAAlgorithm *sa_cipher_by_name(char *name) {
  29.290 +    return sa_algorithm_by_name(cipher_alg, name);
  29.291 +}
  29.292 +
  29.293 +SAAlgorithm *sa_compress_by_name(char *name) {
  29.294 +    return sa_algorithm_by_name(compress_alg, name);
  29.295 +}
  29.296 +
  29.297 +SAAlgorithm *sa_digest_by_index(unsigned int idx) {
  29.298 +    return digest_alg + idx;
  29.299 +}
  29.300 +
  29.301 +SAAlgorithm *sa_cipher_by_index(unsigned int idx) {
  29.302 +    return cipher_alg + idx;
  29.303 +}
  29.304 +
  29.305 +SAAlgorithm *sa_compress_by_index(unsigned int idx) {
  29.306 +    return compress_alg + idx;
  29.307 +}
  29.308 +
  29.309 +static void sa_algorithm_probe(SAAlgorithm *algo){
  29.310 +    int status;
  29.311 +    dprintf("> algo=%p\n", algo); 
  29.312 +    for( ; algo && algo->name; algo++){
  29.313 +        dprintf("> algorithm %s...\n", algo->name);
  29.314 +        status = crypto_alg_available(algo->name, 0);
  29.315 +        dprintf("> algorithm %s status=%d\n",algo->name, status); 
  29.316 +        if (algo->available != status){
  29.317 +            algo->available = status;
  29.318 +        }
  29.319 +    }
  29.320 +    dprintf("<\n"); 
  29.321 +}
  29.322 +
  29.323 +/** Crypto api is broken. When an unregistered algorithm is requested it
  29.324 + * tries to load a module of the same name. But not all algorithms are
  29.325 + * defined by modules of the same name.
  29.326 + */
  29.327 +static char *crypto_modules[] = {
  29.328 +    "aes",
  29.329 +    //"arc4",
  29.330 +    "blowfish",
  29.331 +    //"cast5",
  29.332 +    //"cast6",
  29.333 +    "crypto_null",
  29.334 +    "des",
  29.335 +    //"md4",
  29.336 +    "md5",
  29.337 +    //"serpent",
  29.338 +    "sha1",
  29.339 +    "sha256",
  29.340 +    //"sha512",
  29.341 +    //"twofish",
  29.342 +    NULL
  29.343 +};
  29.344 +
  29.345 +#include <linux/kmod.h>
  29.346 +
  29.347 +static void sa_module_probe(char **modules){
  29.348 +    char **p;
  29.349 +    dprintf(">\n");
  29.350 +    for(p = modules; *p; p++){
  29.351 +        dprintf("> %s\n", *p);
  29.352 +	request_module(*p);
  29.353 +    }
  29.354 +    dprintf("<\n");
  29.355 +}
  29.356 +
  29.357 +/**
  29.358 + * Probe for the availability of crypto algorithms, and set the available
  29.359 + * flag for any algorithms found on the system.  This is typically called by
  29.360 + * pfkey during userspace SA add, update or register.
  29.361 + */
  29.362 +void sa_algorithm_probe_all(void){
  29.363 +    dprintf("> \n"); 
  29.364 +    //BUG_ON(in_softirq());
  29.365 +    sa_module_probe(crypto_modules);
  29.366 +    sa_algorithm_probe(digest_alg);
  29.367 +    sa_algorithm_probe(cipher_alg);
  29.368 +    sa_algorithm_probe(compress_alg);
  29.369 +    dprintf("<\n"); 
  29.370 +}
    30.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    30.2 +++ b/tools/vnet/vnet-module/sa_algorithm.h	Mon Nov 22 16:49:15 2004 +0000
    30.3 @@ -0,0 +1,63 @@
    30.4 +/*
    30.5 + * Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
    30.6 + *
    30.7 + * This program is free software; you can redistribute it and/or modify
    30.8 + * it under the terms of the GNU General Public License as published by the 
    30.9 + * Free Software Foundation; either version 2 of the License, or (at your
   30.10 + * option) any later version.
   30.11 + * 
   30.12 + * This program is distributed in the hope that it will be useful, but
   30.13 + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
   30.14 + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
   30.15 + * for more details.
   30.16 + *
   30.17 + * You should have received a copy of the GNU General Public License along
   30.18 + * with this program; if not, write to the Free software Foundation, Inc.,
   30.19 + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA
   30.20 + *
   30.21 + */
   30.22 +#ifndef __VNET_SA_ALGORITHM_H__
   30.23 +#define __VNET_SA_ALGORITHM_H__
   30.24 +
   30.25 +#include <linux/types.h>
   30.26 +#include <linux/pfkeyv2.h>
   30.27 +
   30.28 +typedef struct SADigestInfo {
   30.29 +    u16 icv_truncbits;
   30.30 +    u16 icv_fullbits;
   30.31 +} SADigestInfo;
   30.32 +
   30.33 +typedef struct SACipherInfo {
   30.34 +    u16 blockbits;
   30.35 +    u16 defkeybits;
   30.36 +} SACipherInfo;
   30.37 +
   30.38 +typedef struct SACompressInfo {
   30.39 +    u16 threshold;
   30.40 +} SACompressInfo;
   30.41 +
   30.42 +typedef struct SAAlgorithm {
   30.43 +    char *name;
   30.44 +    u8 available;
   30.45 +    union {
   30.46 +        SADigestInfo digest;
   30.47 +        SACipherInfo cipher;
   30.48 +        SACompressInfo compress;
   30.49 +    } info;
   30.50 +    struct sadb_alg alg;
   30.51 +} SAAlgorithm;
   30.52 +
   30.53 +extern SAAlgorithm *sa_digest_by_id(int alg_id);
   30.54 +extern SAAlgorithm *sa_cipher_by_id(int alg_id);
   30.55 +extern SAAlgorithm *sa_compress_by_id(int alg_id);
   30.56 +extern SAAlgorithm *sa_digest_by_name(char *name);
   30.57 +extern SAAlgorithm *sa_cipher_by_name(char *name);
   30.58 +extern SAAlgorithm *sa_compress_by_name(char *name);
   30.59 +extern SAAlgorithm *sa_digest_by_index(unsigned int idx);
   30.60 +extern SAAlgorithm *sa_cipher_by_index(unsigned int idx);
   30.61 +extern SAAlgorithm *sa_compress_by_index(unsigned int idx);
   30.62 +extern void sa_algorithm_probe_all(void);
   30.63 +
   30.64 +#define MAX_KEY_BITS 512
   30.65 +
   30.66 +#endif /* ! __VNET_SA_ALGORITHM_H__ */
    31.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    31.2 +++ b/tools/vnet/vnet-module/skb_context.c	Mon Nov 22 16:49:15 2004 +0000
    31.3 @@ -0,0 +1,92 @@
    31.4 +/*
    31.5 + * Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
    31.6 + *
    31.7 + * This program is free software; you can redistribute it and/or modify
    31.8 + * it under the terms of the GNU General Public License as published by the 
    31.9 + * Free Software Foundation; either version 2 of the License, or (at your
   31.10 + * option) any later version.
   31.11 + * 
   31.12 + * This program is distributed in the hope that it will be useful, but
   31.13 + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
   31.14 + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
   31.15 + * for more details.
   31.16 + *
   31.17 + * You should have received a copy of the GNU General Public License along
   31.18 + * with this program; if not, write to the Free software Foundation, Inc.,
   31.19 + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA
   31.20 + *
   31.21 + */
   31.22 +#include <linux/config.h>
   31.23 +#include <linux/kernel.h>
   31.24 +#include <linux/skbuff.h>
   31.25 +#include <linux/slab.h>
   31.26 +
   31.27 +#include <skb_context.h>
   31.28 +
   31.29 +#define MODULE_NAME "VNET"
   31.30 +#define DEBUG 1
   31.31 +#undef DEBUG
   31.32 +#include "debug.h"
   31.33 +
   31.34 +SkbContext *SkbContext_create(u32 vnet, u32 addr, int protocol, void *data,
   31.35 +                              void (*free_fn)(SkbContext *)){
   31.36 +    SkbContext *context = NULL;
   31.37 +
   31.38 +    context = kmalloc(sizeof(SkbContext), GFP_ATOMIC);
   31.39 +    if(!context) goto exit;
   31.40 +    context->vnet = vnet;
   31.41 +    context->addr = addr;
   31.42 +    context->protocol = protocol;
   31.43 +    context->data = data;
   31.44 +    context->free_fn = free_fn;
   31.45 +    context->next = NULL;
   31.46 +    atomic_set(&context ->refcount, 1);
   31.47 +  exit:
   31.48 +    return context;
   31.49 +}
   31.50 +                                       
   31.51 +void SkbContext_free(SkbContext *context){
   31.52 +    if(!context) return;
   31.53 +    if(context->next) SkbContext_decref(context->next);
   31.54 +    if(context->free_fn) context->free_fn(context);
   31.55 +    context->vnet = 0;
   31.56 +    context->addr = 0;
   31.57 +    context->protocol = 0;
   31.58 +    context->free_fn = NULL;
   31.59 +    context->data = NULL;
   31.60 +    context->next = NULL;
   31.61 +    kfree(context);
   31.62 +}
   31.63 +
   31.64 +int SkbContext_push(SkbContext **val, u32 vnet, u32 addr, int protocol,
   31.65 +                    void *data, void (*free_fn)(SkbContext *)){
   31.66 +    int err = 0;
   31.67 +    SkbContext *context = NULL;
   31.68 +
   31.69 +    dprintf("> vnet=%u addr=%u.%u.%u.%u protocol=%d\n",
   31.70 +            vnet, NIPQUAD(addr), protocol);
   31.71 +    context = SkbContext_create(vnet, addr, protocol, data, free_fn);
   31.72 +    if(!context){
   31.73 +        err = -ENOMEM;
   31.74 +        goto exit;
   31.75 +    }
   31.76 +    context->next = *val;
   31.77 +    *val = context;
   31.78 +  exit:
   31.79 +    dprintf("< err=%d\n", err);
   31.80 +    return err;
   31.81 +}
   31.82 +
   31.83 +int skb_push_context(struct sk_buff *skb, u32 vnet, u32 addr, int protocol,
   31.84 +                     void *data, void (*free_fn)(SkbContext *)){
   31.85 +    int err = 0;
   31.86 +    //SkbContext *ctxt = SKB_CONTEXT(skb);
   31.87 +    dprintf("> skb=%p\n", skb);
   31.88 +
   31.89 +    //err = SkbContext_push(&ctxt, vnet, addr, protocol, data, free_fn); //todo fixme
   31.90 +    //SKB_CONTEXT(skb) = ctxt;//todo fixme
   31.91 +    dprintf("< err=%d\n", err);
   31.92 +    return err;
   31.93 +}
   31.94 +                                       
   31.95 +
    32.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    32.2 +++ b/tools/vnet/vnet-module/skb_context.h	Mon Nov 22 16:49:15 2004 +0000
    32.3 @@ -0,0 +1,76 @@
    32.4 +/*
    32.5 + * Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
    32.6 + *
    32.7 + * This program is free software; you can redistribute it and/or modify
    32.8 + * it under the terms of the GNU General Public License as published by the 
    32.9 + * Free Software Foundation; either version 2 of the License, or (at your
   32.10 + * option) any later version.
   32.11 + * 
   32.12 + * This program is distributed in the hope that it will be useful, but
   32.13 + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
   32.14 + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
   32.15 + * for more details.
   32.16 + *
   32.17 + * You should have received a copy of the GNU General Public License along
   32.18 + * with this program; if not, write to the Free software Foundation, Inc.,
   32.19 + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA
   32.20 + *
   32.21 + */
   32.22 +
   32.23 +#ifndef __VNET_SKB_CONTEXT_H__
   32.24 +#define __VNET_SKB_CONTEXT_H__
   32.25 +
   32.26 +#include <linux/config.h>
   32.27 +#include <linux/kernel.h>
   32.28 +#include <asm/atomic.h>
   32.29 +#include <linux/types.h>
   32.30 +
   32.31 +/** Structure used to record inbound processing path for skbs.
   32.32 + * For example, the ETHERIP protocol handler can use this to
   32.33 + * tell whether an inbound packet came through IPSEC ESP or not.
   32.34 + */
   32.35 +typedef struct SkbContext {
   32.36 +    u32 vnet;
   32.37 +    u32 addr;
   32.38 +    int protocol;
   32.39 +    void *data;
   32.40 +    void (*free_fn)(struct SkbContext *);
   32.41 +    atomic_t refcount;
   32.42 +    struct SkbContext *next;
   32.43 +} SkbContext;
   32.44 +
   32.45 +/** Decrement the reference count, freeing if zero.
   32.46 + *
   32.47 + * @param context context (may be null)
   32.48 + */
   32.49 +static inline void SkbContext_decref(SkbContext *context){
   32.50 +    extern void SkbContext_free(SkbContext *context);
   32.51 +    if(!context) return;
   32.52 +    if(atomic_dec_and_test(&context->refcount)){
   32.53 +        SkbContext_free(context);
   32.54 +    }
   32.55 +}
   32.56 +
   32.57 +/** Increment the reference count.
   32.58 + *
   32.59 + * @param context context (may be null)
   32.60 + */
   32.61 +static inline void SkbContext_incref(SkbContext *context){
   32.62 +    if(!context) return;
   32.63 +    atomic_inc(&context->refcount);
   32.64 +}
   32.65 +
   32.66 +extern SkbContext *SkbContext_create(u32 vnet, u32 addr, int protocol, void *data,
   32.67 +                                     void (*free_fn)(SkbContext *));
   32.68 +
   32.69 +extern int SkbContext_push(SkbContext **val, u32 vnet, u32 addr, int protocol,
   32.70 +                           void *data, void (*free_fn)(SkbContext *));
   32.71 +
   32.72 +struct sk_buff;
   32.73 +extern int skb_push_context(struct sk_buff *skb, u32 vnet, u32 addr, int protocol,
   32.74 +                            void *data, void (*free_fn)(SkbContext *));
   32.75 +
   32.76 +//todo: fixme
   32.77 +#define SKB_CONTEXT(_skb) ((SkbContext *)(&(_skb)->cb[0]))
   32.78 +
   32.79 +#endif /* !__VNET_SKB_CONTEXT_H__ */ 
    33.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    33.2 +++ b/tools/vnet/vnet-module/skb_util.c	Mon Nov 22 16:49:15 2004 +0000
    33.3 @@ -0,0 +1,515 @@
    33.4 +/*
    33.5 + * Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
    33.6 + *
    33.7 + * This program is free software; you can redistribute it and/or modify
    33.8 + * it under the terms of the GNU General Public License as published by the 
    33.9 + * Free Software Foundation; either version 2 of the License, or (at your
   33.10 + * option) any later version.
   33.11 + * 
   33.12 + * This program is distributed in the hope that it will be useful, but
   33.13 + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
   33.14 + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
   33.15 + * for more details.
   33.16 + *
   33.17 + * You should have received a copy of the GNU General Public License along
   33.18 + * with this program; if not, write to the Free software Foundation, Inc.,
   33.19 + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA
   33.20 + *
   33.21 + */
   33.22 +#include <linux/config.h>
   33.23 +#include <linux/module.h>
   33.24 +#include <linux/kernel.h>
   33.25 +#include <linux/init.h>
   33.26 +#include <linux/version.h>
   33.27 +
   33.28 +#include <asm/scatterlist.h>
   33.29 +#include <linux/crypto.h>
   33.30 +#include <linux/pfkeyv2.h>
   33.31 +#include <linux/random.h>
   33.32 +
   33.33 +#include <linux/net.h>
   33.34 +#include <linux/in.h>
   33.35 +#include <linux/inet.h>
   33.36 +#include <linux/netdevice.h>
   33.37 +#include <linux/tcp.h>
   33.38 +#include <linux/udp.h>
   33.39 +
   33.40 +#include <net/ip.h>
   33.41 +#include <net/protocol.h>
   33.42 +#include <net/route.h>
   33.43 +#include <linux/skbuff.h>
   33.44 +
   33.45 +#include <varp.h>
   33.46 +#include <skb_util.h>
   33.47 +
   33.48 +#define MODULE_NAME "VNET"
   33.49 +#define DEBUG 1
   33.50 +#undef DEBUG
   33.51 +#include "debug.h"
   33.52 +
   33.53 +static const int DEBUG_SCATTERLIST = 0;
   33.54 +static const int DEBUG_SKB = 0;
   33.55 +
   33.56 +//============================================================================
   33.57 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
   33.58 +#define SET_SCATTER_ADDR(sg, addr) do{} while(0)
   33.59 +#else
   33.60 +#define SET_SCATTER_ADDR(sg, addr) (sg).address = (addr)
   33.61 +#endif
   33.62 +
   33.63 +/** Make enough room in an skb for extra header and trailer.
   33.64 + *
   33.65 + * @param pskb return parameter for expanded skb
   33.66 + * @param skb skb
   33.67 + * @param head_n required headroom
   33.68 + * @param tail_n required tailroom
   33.69 + * @return 0 on success, error code otherwise
   33.70 + */
   33.71 +int skb_make_room(struct sk_buff **pskb, struct sk_buff *skb, int head_n, int tail_n){
   33.72 +    int err = 0;
   33.73 +    int has_headroom = (head_n <= skb_headroom(skb));
   33.74 +    int has_tailroom = (tail_n <= skb_tailroom(skb));
   33.75 +    int writeable = !skb_cloned(skb) && !skb_shared(skb);
   33.76 +
   33.77 +    dprintf("> skb=%p headroom=%d head_n=%d tailroom=%d tail_n=%d\n",
   33.78 +            skb,
   33.79 +            skb_headroom(skb), head_n,
   33.80 +            skb_tailroom(skb), tail_n);
   33.81 +    if(writeable && has_headroom && has_tailroom){
   33.82 +        // There's room! Reuse it.
   33.83 +        *pskb = skb;
   33.84 +    } else if(writeable && has_tailroom){
   33.85 +        // Tailroom, no headroom. Expand header the way GRE does.
   33.86 +        struct sk_buff *new_skb = skb_realloc_headroom(skb, head_n + 16);
   33.87 +        if(!new_skb){
   33.88 +            err = -ENOMEM;
   33.89 +            goto exit;
   33.90 +        }
   33.91 +        dev_kfree_skb(skb);
   33.92 +        *pskb = new_skb;
   33.93 +    } else {
   33.94 +        // No room. Expand. There may be more efficient ways to do
   33.95 +        // this, but this is simple and correct.
   33.96 +        struct sk_buff *new_skb = skb_copy_expand(skb, head_n + 16, tail_n, GFP_ATOMIC);
   33.97 +        if(!new_skb){
   33.98 +            err = -ENOMEM;
   33.99 +            goto exit;
  33.100 +        }
  33.101 +        dev_kfree_skb(skb);
  33.102 +        *pskb = new_skb;
  33.103 +    }
  33.104 +    dprintf("> skb=%p headroom=%d head_n=%d tailroom=%d tail_n=%d\n",
  33.105 +            *pskb,
  33.106 +            skb_headroom(*pskb), head_n,
  33.107 +            skb_tailroom(*pskb), tail_n);
  33.108 +  exit:
  33.109 +    dprintf("< err=%d\n", err);
  33.110 +    return err;
  33.111 +}
  33.112 +
  33.113 +/** Copy some data bits from a kernel buffer to an skb.
  33.114 + * Derived in the obvious way from skb_copy_bits().
  33.115 + */
  33.116 +int skb_put_bits(const struct sk_buff *skb, int offset, void *src, int len)
  33.117 +{
  33.118 +    int i, copy;
  33.119 +    int start = skb->len - skb->data_len;
  33.120 +
  33.121 +    if (offset > (int)skb->len-len)
  33.122 +        goto fault;
  33.123 +
  33.124 +    /* Copy header. */
  33.125 +    if ((copy = start-offset) > 0) {
  33.126 +        if (copy > len)
  33.127 +            copy = len;
  33.128 +        memcpy(skb->data + offset, src, copy);
  33.129 +        if ((len -= copy) == 0)
  33.130 +            return 0;
  33.131 +        offset += copy;
  33.132 +        src += copy;
  33.133 +    }
  33.134 +
  33.135 +    for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
  33.136 +        int end;
  33.137 +
  33.138 +        BUG_TRAP(start <= offset+len);
  33.139 +
  33.140 +        end = start + skb_shinfo(skb)->frags[i].size;
  33.141 +        if ((copy = end-offset) > 0) {
  33.142 +            u8 *vaddr;
  33.143 +
  33.144 +            if (copy > len)
  33.145 +                copy = len;
  33.146 +
  33.147 +            vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]);
  33.148 +            memcpy(vaddr + skb_shinfo(skb)->frags[i].page_offset + offset - start,
  33.149 +                   src,
  33.150 +                   copy);
  33.151 +            kunmap_skb_frag(vaddr);
  33.152 +
  33.153 +            if ((len -= copy) == 0)
  33.154 +                return 0;
  33.155 +            offset += copy;
  33.156 +            src += copy;
  33.157 +        }
  33.158 +        start = end;
  33.159 +    }
  33.160 +
  33.161 +    if (skb_shinfo(skb)->frag_list) {
  33.162 +        struct sk_buff *list;
  33.163 +        
  33.164 +        for (list = skb_shinfo(skb)->frag_list; list; list=list->next) {
  33.165 +            int end;
  33.166 +            
  33.167 +            BUG_TRAP(start <= offset+len);
  33.168 +            
  33.169 +            end = start + list->len;
  33.170 +            if ((copy = end-offset) > 0) {
  33.171 +                if (copy > len)
  33.172 +                    copy = len;
  33.173 +                if (skb_put_bits(list, offset-start, src, copy))
  33.174 +                    goto fault;
  33.175 +                if ((len -= copy) == 0)
  33.176 +                    return 0;
  33.177 +                offset += copy;
  33.178 +                src += copy;
  33.179 +            }
  33.180 +            start = end;
  33.181 +        }
  33.182 +    }
  33.183 +    if (len == 0)
  33.184 +        return 0;
  33.185 +
  33.186 + fault:
  33.187 +    return -EFAULT;
  33.188 +}
  33.189 +
  33.190 +/** Add some space to the end of a (possibly fragmented) skb.
  33.191 + *
  33.192 + * Only works with Xen output skbs.  Output skbs have 1 frag, and we
  33.193 + * add another frag for the extra space.
  33.194 + *
  33.195 + * @param skb skb
  33.196 + * @param n number of bytes to add
  33.197 + * @return 0 on success, error code otherwise 
  33.198 + *
  33.199 + * @todo fixme
  33.200 + */
  33.201 +int pskb_put(struct sk_buff *skb, int n){
  33.202 +    int err = 0;
  33.203 +    if(1 || skb_is_nonlinear(skb)){
  33.204 +        struct skb_shared_info *info = skb_shinfo(skb);
  33.205 +        char *ptr = NULL;
  33.206 +
  33.207 +        if(info->nr_frags >= MAX_SKB_FRAGS){
  33.208 +            err = -ENOMEM;
  33.209 +            goto exit;
  33.210 +        }
  33.211 +        ptr = kmalloc(n, GFP_ATOMIC);
  33.212 +        if(!ptr){
  33.213 +            err = -ENOMEM;
  33.214 +            goto exit;
  33.215 +        }
  33.216 +        info->nr_frags++;
  33.217 +        info->frags[info->nr_frags - 1].page = virt_to_page(ptr);
  33.218 +        info->frags[info->nr_frags - 1].page_offset = ((unsigned long)ptr & ~PAGE_MASK);
  33.219 +        info->frags[info->nr_frags - 1].size = n;
  33.220 +
  33.221 +        skb->data_len += n;
  33.222 +        skb->len += n;
  33.223 +    } else {
  33.224 +        __skb_put(skb, n);
  33.225 +    }
  33.226 +  exit:
  33.227 +    if(err) dprintf("< err=%d\n", err);
  33.228 +    return err;
  33.229 +}
  33.230 +
  33.231 +/** Print some bits of an skb.
  33.232 + *
  33.233 + * @param skb to print
  33.234 + * @param offset byte offset to start printing at
  33.235 + * @param n number of bytes to print
  33.236 + */
  33.237 +void skb_print_bits(struct sk_buff *skb, int offset, int n){
  33.238 +    int chunk = 16;
  33.239 +    int i, k;
  33.240 +    u8 buff[chunk];
  33.241 +    if(!DEBUG_SKB) return;
  33.242 +    while(n){
  33.243 +        k = (n > chunk ? chunk : n);
  33.244 +        skb_copy_bits(skb, offset, buff, k);
  33.245 +        printk("%03d ", offset);
  33.246 +        for(i=0; i<k; i++){
  33.247 +            if(i == 8)printk(" "); 
  33.248 +            printk(":%02x", buff[i] & 0xff);
  33.249 +        }
  33.250 +        printk(" \n");
  33.251 +        n -= k;
  33.252 +        offset += k;
  33.253 +    }
  33.254 +}
  33.255 +
  33.256 +/** Print a buffer.
  33.257 + *
  33.258 + * @param buf to print
  33.259 + * @param n number of bytes to print
  33.260 + */
  33.261 +void buf_print(char *buf, int n){
  33.262 +    int i;
  33.263 +    for(i=0; i<n; i++){
  33.264 +        if( i % 16 == 0) printk("\n%04d ", i);
  33.265 +        else if(i % 8 == 0) printk(" ");
  33.266 +        printk(":%02x", buf[i] & 0xff);
  33.267 +    }
  33.268 +    printk(" %04d\n", n);
  33.269 +}
  33.270 +
  33.271 +/** Remove some space from the tail of an skb.
  33.272 + *
  33.273 + * @todo fixme: Do we need to handle frags?
  33.274 + */
  33.275 +void *skb_trim_tail(struct sk_buff *skb, int n){
  33.276 +    skb->tail -= n;
  33.277 +    skb->len -= n;
  33.278 +    return skb->tail;
  33.279 +}
  33.280 +
  33.281 +// #define BUG_TRAP(x)
  33.282 +// if(!(x)){ printk("KERNEL: assertion (" #x ") failed at " __FILE__ "(%d)\n", __LINE__); }
  33.283 +
  33.284 +/** Convert a (possibly fragmented) skb into a scatter list.
  33.285 + *
  33.286 + * @param skb skb to convert
  33.287 + * @param sg scatterlist to set up
  33.288 + * @param sg_n size of sg on input, number of elements set on output
  33.289 + * @param offset offset into data to start at
  33.290 + * @param len number of bytes
  33.291 + * @return 0 on success, error code otherwise
  33.292 + */
  33.293 +int skb_scatterlist(struct sk_buff *skb, struct scatterlist *sg, int *sg_n,
  33.294 +                    int offset, int len){
  33.295 +    int err = 0;
  33.296 +    int start;		// No. of bytes copied so far (where next copy starts).
  33.297 +    int size;		// Size of the next chunk.
  33.298 +    int end;		// Where the next chunk ends (start + size).
  33.299 +    int copy;		// Number of bytes to copy in one operation.
  33.300 +    int sg_i = 0;	// Index into sg.
  33.301 +    int i;
  33.302 +    
  33.303 +    if(DEBUG_SCATTERLIST){
  33.304 +        dprintf("> offset=%d len=%d (end=%d), skb len=%d,\n",
  33.305 +                offset, len, offset+len, skb->len);
  33.306 +    }
  33.307 +    start = 0;
  33.308 +    size = skb_headlen(skb);
  33.309 +    end = start + size;
  33.310 +    copy = end - offset;
  33.311 +    if(copy > 0){
  33.312 +        char *p;
  33.313 +        if(copy > len) copy = len;
  33.314 +        if(sg_i >= *sg_n){
  33.315 +            err = -EINVAL;
  33.316 +            goto exit;
  33.317 +        }
  33.318 +        p = skb->data + offset;
  33.319 +        SET_SCATTER_ADDR(sg[sg_i], NULL);
  33.320 +        sg[sg_i].page = virt_to_page(p);
  33.321 +        sg[sg_i].offset = ((unsigned long)p & ~PAGE_MASK);
  33.322 +        sg[sg_i].length = copy;
  33.323 +        if(DEBUG_SCATTERLIST){
  33.324 +            dprintf("> sg_i=%d .page=%p .offset=%u .length=%d\n",
  33.325 +                    sg_i, sg[sg_i].page, sg[sg_i].offset, sg[sg_i].length);
  33.326 +        }
  33.327 +        sg_i++;
  33.328 +        if((len -= copy) == 0) goto exit;
  33.329 +        offset += copy;
  33.330 +    }
  33.331 +    start = end;
  33.332 +    for (i = 0; i < skb_shinfo(skb)->nr_frags; i++){
  33.333 +        BUG_TRAP(start <= offset + len);
  33.334 +        size = skb_shinfo(skb)->frags[i].size;
  33.335 +        end = start + size;
  33.336 +        copy = end - offset;
  33.337 +        if(copy > 0){
  33.338 +            skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
  33.339 +            if(copy > len) copy = len;
  33.340 +            if(sg_i >= *sg_n){
  33.341 +                err = -EINVAL;
  33.342 +                goto exit;
  33.343 +            }
  33.344 +            SET_SCATTER_ADDR(sg[sg_i], NULL);
  33.345 +            sg[sg_i].page = frag->page;
  33.346 +            sg[sg_i].offset = frag->page_offset + offset - start;
  33.347 +            sg[sg_i].length = copy;
  33.348 +            if(DEBUG_SCATTERLIST){
  33.349 +                dprintf("> sg_i=%d .page=%p .offset=%u .length=%d\n",
  33.350 +                        sg_i, sg[sg_i].page, sg[sg_i].offset, sg[sg_i].length);
  33.351 +            }
  33.352 +            sg_i++;
  33.353 +            if((len -= copy) == 0) goto exit;
  33.354 +            offset += copy;
  33.355 +        }
  33.356 +        start = end;
  33.357 +    }
  33.358 +  exit:
  33.359 +    if(!err) *sg_n = sg_i;
  33.360 +    if(len) wprintf("> len=%d\n", len);
  33.361 +    if(len) BUG();
  33.362 +    if(err) dprintf("< err=%d sg_n=%d\n", err, *sg_n);
  33.363 +    return err;
  33.364 +}
  33.365 +
  33.366 +struct arpheader
  33.367 +{
  33.368 +	unsigned short	ar_hrd;		/* format of hardware address	*/
  33.369 +	unsigned short	ar_pro;		/* format of protocol address	*/
  33.370 +	unsigned char	ar_hln;		/* length of hardware address	*/
  33.371 +	unsigned char	ar_pln;		/* length of protocol address	*/
  33.372 +	unsigned short	ar_op;		/* ARP opcode (command)		*/
  33.373 +
  33.374 +#if 1
  33.375 +	 /*
  33.376 +	  *	 Ethernet looks like this : This bit is variable sized however...
  33.377 +	  */
  33.378 +	unsigned char		ar_sha[ETH_ALEN];	/* sender hardware address	*/
  33.379 +	unsigned char		ar_sip[4];		/* sender IP address		*/
  33.380 +	unsigned char		ar_tha[ETH_ALEN];	/* target hardware address	*/
  33.381 +	unsigned char		ar_tip[4];		/* target IP address		*/
  33.382 +#endif
  33.383 +
  33.384 +};
  33.385 +
  33.386 +void print_skb_data(char *msg, int count, struct sk_buff *skb, u8 *data, int len)
  33.387 +{
  33.388 +    static int skb_count = 1000000;
  33.389 +    u8 *ptr, *end;
  33.390 +    u32 src_addr, dst_addr;
  33.391 +    // Transport layer header.
  33.392 +    union {
  33.393 +        struct tcphdr  *th;
  33.394 +        struct udphdr  *uh;
  33.395 +        struct icmphdr *icmph;
  33.396 +        struct igmphdr *igmph;
  33.397 +        struct iphdr   *ipiph;
  33.398 +        unsigned char  *raw;
  33.399 +    } h;
  33.400 +    // Network layer header.
  33.401 +    union {
  33.402 +        struct iphdr   *iph;
  33.403 +        struct ipv6hdr *ipv6h;
  33.404 +        struct arpheader  *arph;
  33.405 +        struct ipxhdr  *ipxh;
  33.406 +        unsigned char  *raw;
  33.407 +    } nh;
  33.408 +    // Link layer header.
  33.409 +    union {
  33.410 +        struct ethhdr  *ethernet;
  33.411 +        unsigned char  *raw;
  33.412 +    } mac;
  33.413 +    int protocol;
  33.414 +    if(!count) count = ++skb_count;
  33.415 +    if(!msg) msg = (char *)__FUNCTION__;
  33.416 +    if(!data){
  33.417 +        printk("%s.%d> null data\n", msg, count);
  33.418 +        return;
  33.419 +    }
  33.420 +    ptr = data;
  33.421 +    end = data + len;
  33.422 +    mac.raw = ptr;
  33.423 +    ptr += sizeof(struct ethhdr);
  33.424 +    if(ptr > end){ printk("***MAC:");  goto exit; }
  33.425 +    protocol = ntohs(mac.ethernet->h_proto);
  33.426 +    nh.raw = ptr;
  33.427 +
  33.428 +    printk("%s.%d> type=%d protocol=0x%x\n",
  33.429 +           msg, count, skb->pkt_type, htons(skb->protocol));
  33.430 +    if(1){
  33.431 +        printk("%s.%d> %p mac src=" MACFMT " dst=" MACFMT "\n",
  33.432 +               msg, count, data,
  33.433 +               MAC6TUPLE(mac.ethernet->h_source),
  33.434 +               MAC6TUPLE(mac.ethernet->h_dest));
  33.435 +    }
  33.436 +
  33.437 +    switch(protocol){
  33.438 +    case ETH_P_ARP:
  33.439 +        ptr += sizeof(struct arpheader);
  33.440 +        if(ptr > end){ printk("***ARP:");  goto exit; }
  33.441 +        if(0){
  33.442 +            printk("%s.%d> ARP hrd=%d, pro=%d, hln=%d, pln=%d, op=%d\n",
  33.443 +                   msg, count,
  33.444 +                   nh.arph->ar_hrd, nh.arph->ar_pro, nh.arph->ar_hln,
  33.445 +                   nh.arph->ar_pln, nh.arph->ar_op);
  33.446 +        }
  33.447 +        memcpy(&src_addr, nh.arph->ar_sip, 4);
  33.448 +        src_addr = ntohl(src_addr);
  33.449 +        memcpy(&dst_addr, nh.arph->ar_tip, 4);
  33.450 +        dst_addr = ntohl(dst_addr);
  33.451 +        printk("%s.%d> ARP HW src=" MACFMT " dst=" MACFMT "\n",
  33.452 +               msg, count, MAC6TUPLE(nh.arph->ar_sha), MAC6TUPLE(nh.arph->ar_tha));
  33.453 +        printk("%s.%d> ARP IP src=" IPFMT " dst=" IPFMT "\n",
  33.454 +               msg, count, HIPQUAD(src_addr), HIPQUAD(dst_addr));
  33.455 +        break;
  33.456 +    case ETH_P_IP: {
  33.457 +        u16 src_port, dst_port;
  33.458 +        if(ptr + sizeof(struct iphdr) > end){ printk("***IP:");  goto exit; }
  33.459 +        src_addr = ntohl(nh.iph->saddr);
  33.460 +        dst_addr = ntohl(nh.iph->daddr);
  33.461 +        if(1){
  33.462 +            printk("%s.%d> IP proto=%d src=" IPFMT " dst=" IPFMT "\n",
  33.463 +                   msg, count, nh.iph->protocol,
  33.464 +                   HIPQUAD(src_addr), HIPQUAD(dst_addr));
  33.465 +            printk("%s.%d> IP tot_len=%u len=%d\n",
  33.466 +                   msg, count, nh.iph->tot_len & 0xffff, len - ETH_HLEN);
  33.467 +        }
  33.468 +        ptr += (nh.iph->ihl * 4);
  33.469 +        if(ptr > end){ printk ("***IP: len"); goto exit; }
  33.470 +        h.raw = ptr;
  33.471 +        switch(nh.iph->protocol){
  33.472 +        case IPPROTO_TCP:
  33.473 +            ptr += sizeof(struct tcphdr);
  33.474 +            if(ptr > end){ printk("***TCP:"); goto exit; }
  33.475 +            src_port = ntohs(h.th->source);
  33.476 +            dst_port = ntohs(h.th->dest);
  33.477 +            printk("%s.%d> TCP src=" IPFMT ":%u dst=" IPFMT ":%u\n",
  33.478 +                   msg, count,
  33.479 +                   HIPQUAD(src_addr), src_port,
  33.480 +                   HIPQUAD(dst_addr), dst_port);
  33.481 +            break;
  33.482 +        case IPPROTO_UDP:
  33.483 +            ptr += sizeof(struct udphdr);
  33.484 +            if(ptr > end){ printk("***UDP:"); goto exit; }
  33.485 +            src_port = ntohs(h.uh->source);
  33.486 +            dst_port = ntohs(h.uh->dest);
  33.487 +            printk("%s.%d> UDP src=" IPFMT ":%u dst=" IPFMT ":%u\n",
  33.488 +                   msg, count,
  33.489 +                   HIPQUAD(src_addr), src_port,
  33.490 +                   HIPQUAD(dst_addr), dst_port);
  33.491 +            break;
  33.492 +        default:
  33.493 +            printk("%s.%d> IP %d src=" IPFMT " dst=" IPFMT "\n",
  33.494 +                   msg, count,
  33.495 +                   nh.iph->protocol, HIPQUAD(src_addr), HIPQUAD(dst_addr));
  33.496 +            break;
  33.497 +        }
  33.498 +        break; }
  33.499 +    case ETH_P_IPV6:
  33.500 +        printk("%s.%d> IPv6\n", msg, count);
  33.501 +        break;
  33.502 +    case ETH_P_IPX:
  33.503 +        printk("%s.%d> IPX\n", msg, count);
  33.504 +        break;
  33.505 +    default:
  33.506 +        printk("%s.%d> protocol=%d\n", msg, count, protocol);
  33.507 +        break;
  33.508 +    }
  33.509 +    return;
  33.510 +  exit:
  33.511 +    printk("%s.%d> %s: skb problem\n", msg, count, __FUNCTION__);
  33.512 +    printk("%s.%d> %s: data=%p end=%p(%d) ptr=%p(%d) eth=%d arp=%d ip=%d\n",
  33.513 +           msg, count, __FUNCTION__,
  33.514 +           data, end, end - data, ptr, ptr - data,
  33.515 +           sizeof(struct ethhdr), sizeof(struct arphdr), sizeof(struct iphdr));
  33.516 +    return;
  33.517 +}
  33.518 +
    34.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    34.2 +++ b/tools/vnet/vnet-module/skb_util.h	Mon Nov 22 16:49:15 2004 +0000
    34.3 @@ -0,0 +1,43 @@
    34.4 +/*
    34.5 + * Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
    34.6 + *
    34.7 + * This program is free software; you can redistribute it and/or modify
    34.8 + * it under the terms of the GNU General Public License as published by the 
    34.9 + * Free Software Foundation; either version 2 of the License, or (at your
   34.10 + * option) any later version.
   34.11 + * 
   34.12 + * This program is distributed in the hope that it will be useful, but
   34.13 + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
   34.14 + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
   34.15 + * for more details.
   34.16 + *
   34.17 + * You should have received a copy of the GNU General Public License along
   34.18 + * with this program; if not, write to the Free software Foundation, Inc.,
   34.19 + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA
   34.20 + *
   34.21 + */
   34.22 +#ifndef _VNET_SKB_UTIL_H_
   34.23 +#define _VNET_SKB_UTIL_H_
   34.24 +
   34.25 +struct sk_buff;
   34.26 +struct scatterlist;
   34.27 +
   34.28 +extern int skb_make_room(struct sk_buff **pskb, struct sk_buff *skb, int head_n, int tail_n);
   34.29 +
   34.30 +extern int skb_put_bits(const struct sk_buff *skb, int offset, void *src, int len);
   34.31 +
   34.32 +extern int pskb_put(struct sk_buff *skb, int n);
   34.33 +
   34.34 +extern void skb_print_bits(struct sk_buff *skb, int offset, int n);
   34.35 +
   34.36 +extern void buf_print(char *buf, int n);
   34.37 +
   34.38 +extern void *skb_trim_tail(struct sk_buff *skb, int n);
   34.39 +
   34.40 +extern int skb_scatterlist(struct sk_buff *skb, struct scatterlist *sg,
   34.41 +                           int *sg_n, int offset, int len);
   34.42 +
   34.43 +extern void print_skb_data(char *msg, int count, struct sk_buff *skb, u8 *data, int len);
   34.44 +
   34.45 +
   34.46 +#endif
    35.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    35.2 +++ b/tools/vnet/vnet-module/tunnel.c	Mon Nov 22 16:49:15 2004 +0000
    35.3 @@ -0,0 +1,228 @@
    35.4 +/*
    35.5 + * Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
    35.6 + *
    35.7 + * This program is free software; you can redistribute it and/or modify
    35.8 + * it under the terms of the GNU General Public License as published by the 
    35.9 + * Free Software Foundation; either version 2 of the License, or (at your
   35.10 + * option) any later version.
   35.11 + * 
   35.12 + * This program is distributed in the hope that it will be useful, but
   35.13 + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
   35.14 + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
   35.15 + * for more details.
   35.16 + *
   35.17 + * You should have received a copy of the GNU General Public License along
   35.18 + * with this program; if not, write to the Free software Foundation, Inc.,
   35.19 + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA
   35.20 + *
   35.21 + */
   35.22 +#include <linux/config.h>
   35.23 +#include <linux/module.h>
   35.24 +#include <linux/init.h>
   35.25 +
   35.26 +#include <linux/net.h>
   35.27 +#include <linux/in.h>
   35.28 +#include <linux/inet.h>
   35.29 +#include <linux/netdevice.h>
   35.30 +
   35.31 +#include <net/ip.h>
   35.32 +#include <net/protocol.h>
   35.33 +#include <net/route.h>
   35.34 +#include <linux/skbuff.h>
   35.35 +
   35.36 +#include <tunnel.h>
   35.37 +#include <vnet.h>
   35.38 +#include <varp.h>
   35.39 +#include "hash_table.h"
   35.40 +
   35.41 +#define MODULE_NAME "VNET"
   35.42 +//#define DEBUG 1
   35.43 +#undef DEBUG
   35.44 +#include "debug.h"
   35.45 +
   35.46 +void Tunnel_print(Tunnel *tunnel){
   35.47 +    if(tunnel){
   35.48 +        printk("Tunnel<%p base=%p ref=%02d type=%s>\n",
   35.49 +               tunnel,
   35.50 +               tunnel->base,
   35.51 +               atomic_read(&tunnel->refcount),
   35.52 +               tunnel->type->name);
   35.53 +        if(tunnel->base){
   35.54 +            Tunnel_print(tunnel->base);
   35.55 +        }
   35.56 +    } else {
   35.57 +        printk("Tunnel<%p base=%p ref=%02d type=%s>\n",
   35.58 +               NULL, NULL, 0, "ip");
   35.59 +    }
   35.60 +}
   35.61 +
   35.62 +int Tunnel_create(TunnelType *type, u32 vnet, u32 addr, Tunnel *base, Tunnel **val){
   35.63 +    int err = 0;
   35.64 +    Tunnel *tunnel = NULL;
   35.65 +    dprintf("> type=%s vnet=%d addr=" IPFMT " base=%s\n",
   35.66 +            type->name, vnet, NIPQUAD(addr), (base ? base->type->name : "ip"));
   35.67 +    if(!type || !type->open || !type->send || !type->close){
   35.68 +        err = -EINVAL;
   35.69 +        goto exit;
   35.70 +    }
   35.71 +    tunnel = kmalloc(sizeof(Tunnel), GFP_ATOMIC);
   35.72 +    if(!tunnel){
   35.73 +        err = -ENOMEM;
   35.74 +        goto exit;
   35.75 +    }
   35.76 +    atomic_set(&tunnel->refcount, 1);
   35.77 +    tunnel->key.vnet = vnet;
   35.78 +    tunnel->key.addr = addr;
   35.79 +    tunnel->type = type;
   35.80 +    tunnel->data = NULL;
   35.81 +    tunnel->send_stats = (TunnelStats){};
   35.82 +    Tunnel_incref(base);
   35.83 +    tunnel->base = base;
   35.84 +    err = type->open(tunnel);
   35.85 +  exit:
   35.86 +    if(err && tunnel){
   35.87 +        Tunnel_decref(tunnel);
   35.88 +        tunnel = NULL;
   35.89 +    }
   35.90 +    *val = tunnel;
   35.91 +    dprintf("< err=%d\n", err);
   35.92 +    return err;
   35.93 +}
   35.94 +
   35.95 +int Tunnel_open(TunnelType *type, u32 vnet, u32 addr, Tunnel *base, Tunnel **tunnel){
   35.96 +    int err = 0;
   35.97 +
   35.98 +    dprintf(">\n");
   35.99 +    err = Tunnel_create(type, vnet, addr, base, tunnel);
  35.100 +    if(err) goto exit;
  35.101 +    err = Tunnel_add(*tunnel);
  35.102 +  exit:
  35.103 +    if(err){
  35.104 +        Tunnel_decref(*tunnel);
  35.105 +        *tunnel = NULL;
  35.106 +    }
  35.107 +    dprintf("< err=%d\n", err);
  35.108 +    return err;
  35.109 +}
  35.110 +
  35.111 +void TunnelStats_update(TunnelStats *stats, int len, int err){
  35.112 +    dprintf(">len=%d  err=%d\n", len, err);
  35.113 +    if(err){
  35.114 +        stats->dropped_bytes += len;
  35.115 +        stats->dropped_packets++;
  35.116 +    } else {
  35.117 +        stats->bytes += len;
  35.118 +        stats->packets++;
  35.119 +    }
  35.120 +    dprintf("<\n");
  35.121 +}
  35.122 +
  35.123 +/** Table of tunnels, indexed by vnet and addr. */
  35.124 +HashTable *tunnel_table = NULL;
  35.125 +
  35.126 +static inline Hashcode tunnel_table_key_hash_fn(void *k){
  35.127 +    TunnelKey *key = k;
  35.128 +    Hashcode h = 0;
  35.129 +    h = hash_2ul(key->vnet, key->addr);
  35.130 +    return h;
  35.131 +}
  35.132 +
  35.133 +static int tunnel_table_key_equal_fn(void *k1, void *k2){
  35.134 +    TunnelKey *key1 = k1;
  35.135 +    TunnelKey *key2 = k2;
  35.136 +    return (key1->vnet == key2->vnet)
  35.137 +        && (key1->addr == key2->addr);
  35.138 +}
  35.139 +
  35.140 +static void tunnel_table_entry_free_fn(HashTable *table, HTEntry *entry){
  35.141 +    Tunnel *tunnel;
  35.142 +    if(!entry) return;
  35.143 +    tunnel = entry->value;
  35.144 +    //dprintf(">\n"); Tunnel_print(tunnel);
  35.145 +    Tunnel_decref(tunnel);
  35.146 +    HTEntry_free(entry);
  35.147 +}
  35.148 +
  35.149 +int Tunnel_init(void){
  35.150 +    int err = 0;
  35.151 +    dprintf(">\n");
  35.152 +    tunnel_table = HashTable_new(0);
  35.153 +    if(!tunnel_table){
  35.154 +        err = -ENOMEM;
  35.155 +        goto exit;
  35.156 +    }
  35.157 +    tunnel_table->entry_free_fn = tunnel_table_entry_free_fn;
  35.158 +    tunnel_table->key_hash_fn = tunnel_table_key_hash_fn;
  35.159 +    tunnel_table->key_equal_fn = tunnel_table_key_equal_fn;
  35.160 +  exit:
  35.161 +    dprintf("< err=%d\n", err);
  35.162 +    return err;
  35.163 +}
  35.164 +    
  35.165 +/** Lookup tunnel state by vnet and destination.
  35.166 + *
  35.167 + * @param vnet vnet
  35.168 + * @param addr destination address
  35.169 + * @return tunnel state or NULL
  35.170 + */
  35.171 +Tunnel * Tunnel_lookup(u32 vnet, u32 addr){
  35.172 +    Tunnel *tunnel = NULL;
  35.173 +    TunnelKey key = {.vnet = vnet, .addr = addr };
  35.174 +    dprintf(">\n");
  35.175 +    tunnel = HashTable_get(tunnel_table, &key);
  35.176 +    Tunnel_incref(tunnel);
  35.177 +    dprintf("< tunnel=%p\n", tunnel);
  35.178 +    return tunnel;
  35.179 +}
  35.180 +
  35.181 +int Tunnel_add(Tunnel *tunnel){
  35.182 +    int err = 0;
  35.183 +    dprintf(">\n");
  35.184 +    if(HashTable_add(tunnel_table, tunnel, tunnel)){
  35.185 +        Tunnel_incref(tunnel);   
  35.186 +    } else {
  35.187 +        err = -ENOMEM;
  35.188 +    }
  35.189 +    dprintf("< err=%d\n", err);
  35.190 +    return err;
  35.191 +}
  35.192 +
  35.193 +int Tunnel_del(Tunnel *tunnel){
  35.194 +    return HashTable_remove(tunnel_table, tunnel);
  35.195 +}
  35.196 +
  35.197 +/** Do tunnel send processing on a packet.
  35.198 + *
  35.199 + * @param tunnel tunnel state
  35.200 + * @param skb packet
  35.201 + * @return 0 on success, error code otherwise
  35.202 + */
  35.203 +int Tunnel_send(Tunnel *tunnel, struct sk_buff *skb){
  35.204 +    int err = 0;
  35.205 +    int len;
  35.206 +    dprintf("> tunnel=%p skb=%p\n", tunnel, skb);
  35.207 +    len = skb->len;
  35.208 +    if(tunnel){
  35.209 +        dprintf("> type=%s type->send...\n", tunnel->type->name);
  35.210 +        err = tunnel->type->send(tunnel, skb);
  35.211 +        // Must not refer to skb after sending - might have been freed.
  35.212 +        TunnelStats_update(&tunnel->send_stats, len, err);
  35.213 +    } else {
  35.214 +        struct net_device *dev = NULL;
  35.215 +        err = vnet_get_device(DEVICE, &dev);
  35.216 +        if(err) goto exit;
  35.217 +        skb->dev = dev;
  35.218 +        err = skb_xmit(skb);
  35.219 +        dev_put(dev);
  35.220 +    }
  35.221 +  exit:
  35.222 +    dprintf("< err=%d\n", err);
  35.223 +    return err;
  35.224 +}
  35.225 +
  35.226 +int __init tunnel_module_init(void){
  35.227 +    return Tunnel_init();
  35.228 +}
  35.229 +
  35.230 +void __exit tunnel_module_exit(void){
  35.231 +}
    36.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    36.2 +++ b/tools/vnet/vnet-module/tunnel.h	Mon Nov 22 16:49:15 2004 +0000
    36.3 @@ -0,0 +1,101 @@
    36.4 +/*
    36.5 + * Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
    36.6 + *
    36.7 + * This program is free software; you can redistribute it and/or modify
    36.8 + * it under the terms of the GNU General Public License as published by the 
    36.9 + * Free Software Foundation; either version 2 of the License, or (at your
   36.10 + * option) any later version.
   36.11 + * 
   36.12 + * This program is distributed in the hope that it will be useful, but
   36.13 + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
   36.14 + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
   36.15 + * for more details.
   36.16 + *
   36.17 + * You should have received a copy of the GNU General Public License along
   36.18 + * with this program; if not, write to the Free software Foundation, Inc.,
   36.19 + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA
   36.20 + *
   36.21 + */
   36.22 +#ifndef __VNET_TUNNEL_H__
   36.23 +#define __VNET_TUNNEL_H__
   36.24 +
   36.25 +#include <linux/types.h>
   36.26 +#include <linux/slab.h>
   36.27 +#include <asm/atomic.h>
   36.28 +
   36.29 +struct sk_buff;
   36.30 +struct Tunnel;
   36.31 +
   36.32 +typedef struct TunnelType {
   36.33 +    const char *name;
   36.34 +    int (*open)(struct Tunnel *tunnel);
   36.35 +    int (*send)(struct Tunnel *tunnel, struct sk_buff *skb);
   36.36 +    void (*close)(struct Tunnel *tunnel);
   36.37 +} TunnelType;
   36.38 +
   36.39 +typedef struct TunnelStats {
   36.40 +    int bytes;
   36.41 +    int packets;
   36.42 +    int dropped_bytes;
   36.43 +    int dropped_packets;
   36.44 +} TunnelStats;
   36.45 +
   36.46 +typedef struct TunnelKey {
   36.47 +    u32 vnet;
   36.48 +    u32 addr;
   36.49 +} TunnelKey;
   36.50 +
   36.51 +typedef struct Tunnel {
   36.52 +    /** Key identifying the tunnel. Must be first. */
   36.53 +    struct TunnelKey key;
   36.54 +    /** Reference count. */
   36.55 +    atomic_t refcount;
   36.56 +    /** Tunnel type. */
   36.57 +    struct TunnelType *type;
   36.58 +    /** Statistics. */
   36.59 +    struct TunnelStats send_stats;
   36.60 +    /** Type-dependent state. */
   36.61 +    void *data;
   36.62 +    /** Underlying tunnel (may be null). */
   36.63 +    struct Tunnel *base;
   36.64 +} Tunnel;
   36.65 +
   36.66 +extern void Tunnel_print(Tunnel *tunnel);
   36.67 +
   36.68 +/** Decrement the reference count, freeing if zero.
   36.69 + *
   36.70 + * @param tunnel tunnel (may be null)
   36.71 + */
   36.72 +static inline void Tunnel_decref(Tunnel *tunnel){
   36.73 +    if(!tunnel) return;
   36.74 +    if(atomic_dec_and_test(&tunnel->refcount)){
   36.75 +        printk("%s> Closing tunnel:\n", __FUNCTION__);
   36.76 +        Tunnel_print(tunnel);
   36.77 +        tunnel->type->close(tunnel);
   36.78 +        Tunnel_decref(tunnel->base);
   36.79 +        kfree(tunnel);
   36.80 +    }
   36.81 +}
   36.82 +
   36.83 +/** Increment the reference count.
   36.84 + *
   36.85 + * @param tunnel tunnel (may be null)
   36.86 + */
   36.87 +static inline void Tunnel_incref(Tunnel *tunnel){
   36.88 +    if(!tunnel) return;
   36.89 +    atomic_inc(&tunnel->refcount);
   36.90 +}
   36.91 +
   36.92 +extern int Tunnel_init(void);
   36.93 +extern Tunnel * Tunnel_lookup(u32 vnet, u32 addr);
   36.94 +extern int Tunnel_add(Tunnel *tunnel);
   36.95 +extern int Tunnel_del(Tunnel *tunnel);
   36.96 +extern int Tunnel_send(Tunnel *tunnel, struct sk_buff *skb);
   36.97 +
   36.98 +extern int Tunnel_create(TunnelType *type, u32 vnet, u32 addr, Tunnel *base, Tunnel **tunnelp);
   36.99 +extern int Tunnel_open(TunnelType *type, u32 vnet, u32 addr, Tunnel *base, Tunnel **tunnelp);
  36.100 +
  36.101 +extern int tunnel_module_init(void);
  36.102 +extern void tunnel_module_exit(void);
  36.103 +
  36.104 +#endif /* !__VNET_TUNNEL_H__ */
    37.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    37.2 +++ b/tools/vnet/vnet-module/varp.c	Mon Nov 22 16:49:15 2004 +0000
    37.3 @@ -0,0 +1,1236 @@
    37.4 +/*
    37.5 + * Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
    37.6 + *
    37.7 + * This program is free software; you can redistribute it and/or modify
    37.8 + * it under the terms of the GNU General Public License as published by the 
    37.9 + * Free Software Foundation; either version 2 of the License, or (at your
   37.10 + * option) any later version.
   37.11 + * 
   37.12 + * This program is distributed in the hope that it will be useful, but
   37.13 + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
   37.14 + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
   37.15 + * for more details.
   37.16 + *
   37.17 + * You should have received a copy of the GNU General Public License along
   37.18 + * with this program; if not, write to the Free software Foundation, Inc.,
   37.19 + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA
   37.20 + *
   37.21 + */
   37.22 +
   37.23 +#include <linux/config.h>
   37.24 +#include <linux/kernel.h>
   37.25 +#include <linux/module.h>
   37.26 +#include <linux/init.h>
   37.27 +#include <linux/string.h>
   37.28 +#include <linux/version.h>
   37.29 +
   37.30 +#include <linux/net.h>
   37.31 +#include <linux/in.h>
   37.32 +#include <linux/inet.h>
   37.33 +#include <linux/netdevice.h>
   37.34 +#include <linux/udp.h>
   37.35 +
   37.36 +#include <net/ip.h>
   37.37 +#include <net/protocol.h>
   37.38 +#include <net/route.h>
   37.39 +#include <linux/skbuff.h>
   37.40 +#include <linux/spinlock.h>
   37.41 +#include <asm/semaphore.h>
   37.42 +
   37.43 +#include <tunnel.h>
   37.44 +#include <vnet.h>
   37.45 +#include <vif.h>
   37.46 +#include <varp.h>
   37.47 +#include <if_varp.h>
   37.48 +
   37.49 +#include "allocate.h"
   37.50 +#include "hash_table.h"
   37.51 +#include "sys_net.h"
   37.52 +#include "sys_string.h"
   37.53 +
   37.54 +#define MODULE_NAME "VARP"
   37.55 +//#define DEBUG 1
   37.56 +#undef DEBUG
   37.57 +#include "debug.h"
   37.58 +
   37.59 +
   37.60 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
   37.61 +// The 'ethernet' field in the skb->mac union went away.
   37.62 +#define MAC_ETH(_skb) ((struct ethhdr *)(_skb)->mac.raw)
   37.63 +#else
   37.64 +#define MAC_ETH(_skb) ((_skb)->mac.ethernet)
   37.65 +#endif
   37.66 +
   37.67 +/** @file VARP: Virtual ARP.
   37.68 + *
   37.69 + * Handles virtual ARP requests for vnet/vmac.
   37.70 + */
   37.71 +
   37.72 +/*
   37.73 +
   37.74 +Varp uses UDP on port 1798.
   37.75 +
   37.76 +on domain up: ?
   37.77 +  send varp.announce { id, vmac, vnet, coa } for each vif
   37.78 +  that haven't announced before, or has changed.
   37.79 +  install vif entries in local table.
   37.80 +
   37.81 +on varp.announce{ id, vmac, vnet, coa }:
   37.82 +  update VARP entry for vmac x vnet if have one, reset ttl.
   37.83 +
   37.84 +on varp.request { id, vmac, vnet }:
   37.85 +  if have a vif for the requested vmac/vnet,
   37.86 +  reply with varp.announce{ id, vmac, vnet, coa }
   37.87 +
   37.88 +on timer:
   37.89 +  traverse VARP table, flush old entries.
   37.90 +
   37.91 +on probe timer:
   37.92 +  probe again if not out of tries.
   37.93 +  if out of tries invalidate entry.
   37.94 +
   37.95 +*/
   37.96 +
   37.97 +/** Time-to-live of varp entries (in jiffies).*/
   37.98 +#define VARP_ENTRY_TTL      (60*HZ)
   37.99 +
  37.100 +/** Maximum number of varp probes to make. */
  37.101 +#define VARP_PROBE_MAX      5
  37.102 +
  37.103 +/** Interval between varp probes (in jiffies). */
  37.104 +#define VARP_PROBE_INTERVAL (3*HZ)
  37.105 +
  37.106 +/** Maximum number of queued skbs for a varp entry. */
  37.107 +#define VARP_QUEUE_MAX      16
  37.108 +
  37.109 +/** Number of buckets in the varp table (must be prime). */
  37.110 +#define VARP_TABLE_BUCKETS  3001
  37.111 +
  37.112 +/** Varp entry states. */
  37.113 +enum {
  37.114 +    VARP_STATE_INCOMPLETE = 1,
  37.115 +    VARP_STATE_REACHABLE = 2,
  37.116 +    VARP_STATE_FAILED = 3
  37.117 +};
  37.118 +
  37.119 +/** Varp entry flags. */
  37.120 +enum {
  37.121 +    VARP_FLAG_PROBING = 1,
  37.122 +    VARP_FLAG_PERMANENT = 2,
  37.123 +};
  37.124 +
  37.125 +/** Key for varp entries. */
  37.126 +typedef struct VarpKey {
  37.127 +    /** Vnet id (host order). */
  37.128 +    u32 vnet;
  37.129 +    /** Virtual MAC address. */
  37.130 +    Vmac vmac;
  37.131 +} VarpKey;
  37.132 +
  37.133 +/** An entry in the varp cache. */
  37.134 +typedef struct VarpEntry {
  37.135 +    /** Key for the entry. */
  37.136 +    VarpKey key;
  37.137 +    /** Care-of address for the key. */
  37.138 +    u32 addr;
  37.139 +    /** Last-updated timestamp. */
  37.140 +    unsigned long timestamp;
  37.141 +    /** State. */
  37.142 +    short state;
  37.143 +    /** Flags. */
  37.144 +    short flags;
  37.145 +    /** Reference count. */
  37.146 +    atomic_t refcount;
  37.147 +    /** Lock. */
  37.148 +    rwlock_t lock;
  37.149 +    /** How many probes have been made. */
  37.150 +    atomic_t probes;
  37.151 +    /** Probe timer. */
  37.152 +    struct timer_list timer;
  37.153 +    void (*error)(struct VarpEntry *ventry, struct sk_buff *skb);
  37.154 +    /** Outbound skb queue. */
  37.155 +    struct sk_buff_head queue;
  37.156 +    /** Maximum size of the queue. */
  37.157 +    int queue_max;
  37.158 +
  37.159 +    int locks;
  37.160 +} VarpEntry;
  37.161 +
  37.162 +/** The varp cache. Varp entries indexed by VarpKey. */
  37.163 +typedef struct VarpTable {
  37.164 +
  37.165 +    HashTable *table;
  37.166 +
  37.167 +    /** Sweep timer. */
  37.168 +    struct timer_list timer;
  37.169 +
  37.170 +    /** Lock. Need to use a semaphore instead of a spinlock because
  37.171 +     * some operations under the varp table lock can schedule - and
  37.172 +     * you mustn't hold a spinlock when scheduling.
  37.173 +     */
  37.174 +    struct semaphore lock;
  37.175 +
  37.176 +} VarpTable;
  37.177 +
  37.178 +/** The varp cache. */
  37.179 +static VarpTable *varp_table = NULL;
  37.180 +
  37.181 +/** Module parameter for the multicast address. */
  37.182 +static char *varp_mcaddr = NULL;
  37.183 +
  37.184 +/** Multicast address (network order). */
  37.185 +u32 varp_mcast_addr = 0;
  37.186 +
  37.187 +/** Unicast address (network order). */
  37.188 +u32 varp_ucast_addr = 0;
  37.189 +
  37.190 +/** UDP port (network order). */
  37.191 +u16 varp_port = 0;
  37.192 +
  37.193 +/** Network device to use. */
  37.194 +char *varp_device = DEVICE;
  37.195 +
  37.196 +#define VarpTable_read_lock(z, flags)    do{ (flags) = 0; down(&(z)->lock); } while(0)
  37.197 +#define VarpTable_read_unlock(z, flags)  do{ (flags) = 0; up(&(z)->lock); } while(0)
  37.198 +#define VarpTable_write_lock(z, flags)   do{ (flags) = 0; down(&(z)->lock); } while(0)
  37.199 +#define VarpTable_write_unlock(z, flags) do{ (flags) = 0; up(&(z)->lock); } while(0)
  37.200 +
  37.201 +#define VarpEntry_lock(ventry, flags)    write_lock_irqsave(&(ventry)->lock, (flags))
  37.202 +#define VarpEntry_unlock(ventry, flags)  write_unlock_irqrestore(&(ventry)->lock, (flags))
  37.203 +
  37.204 +void VarpTable_sweep(VarpTable *z, int all);
  37.205 +void VarpTable_print(VarpTable *z);
  37.206 +
  37.207 +/** Print the varp cache (if debug on).
  37.208 + */
  37.209 +void varp_dprint(void){
  37.210 +#ifdef DEBUG
  37.211 +    VarpTable_print(varp_table);
  37.212 +#endif
  37.213 +} 
  37.214 +
  37.215 +/** Print varp info and the varp cache.
  37.216 + */
  37.217 +void varp_print(void){
  37.218 +    printk(KERN_INFO "=== VARP ===============================================================\n");
  37.219 +    printk(KERN_INFO "varp_device     %s\n", varp_device);
  37.220 +    printk(KERN_INFO "varp_mcast_addr " IPFMT "\n", NIPQUAD(varp_mcast_addr));
  37.221 +    printk(KERN_INFO "varp_ucast_addr " IPFMT "\n", NIPQUAD(varp_ucast_addr));
  37.222 +    printk(KERN_INFO "varp_port       %d\n", ntohs(varp_port));
  37.223 +    VarpTable_print(varp_table);
  37.224 +    printk(KERN_INFO "========================================================================\n");
  37.225 +}
  37.226 +
  37.227 +/** Lookup a network device by name.
  37.228 + *
  37.229 + * @param name device name
  37.230 + * @param dev return parameter for the device
  37.231 + * @return 0 on success, error code otherwise
  37.232 + */
  37.233 +int vnet_get_device(const char *name, struct net_device **dev){
  37.234 +    int err = 0;
  37.235 +    *dev = dev_get_by_name(name);
  37.236 +    if(!*dev){
  37.237 +        err = -ENETDOWN;
  37.238 +    }
  37.239 +    return err;
  37.240 +}
  37.241 +
  37.242 +/** Get the source address from a device.
  37.243 + *
  37.244 + * @param dev device
  37.245 + * @param addr return parameter for address
  37.246 + * @return 0 on success, error code otherwise
  37.247 + */
  37.248 +int vnet_get_device_address(struct net_device *dev, u32 *addr){
  37.249 +    int err = 0;
  37.250 +    struct in_device *in_dev;
  37.251 +
  37.252 +    //printk("%s>\n", __FUNCTION__);
  37.253 +    in_dev = in_dev_get(dev);
  37.254 +    if(!in_dev){
  37.255 +        err = -EIO;
  37.256 +        goto exit;
  37.257 +    }
  37.258 +    *addr = in_dev->ifa_list->ifa_address;
  37.259 +    in_dev_put(in_dev);
  37.260 +  exit:
  37.261 +    //printk("%s< err=%d\n", __FUNCTION__, err);
  37.262 +    return err;
  37.263 +}
  37.264 +
  37.265 +#ifndef LL_RESERVED_SPACE
  37.266 +#define HH_DATA_MOD	16
  37.267 +#define LL_RESERVED_SPACE(dev) \
  37.268 +        ((dev->hard_header_len & ~(HH_DATA_MOD - 1)) + HH_DATA_MOD)
  37.269 +#endif
  37.270 +
  37.271 +/** Send a varp protocol message.
  37.272 + *
  37.273 + * @param opcode varp opcode (host order)
  37.274 + * @param dev device (may be null)
  37.275 + * @param skb skb being replied to (may be null)
  37.276 + * @param vnet vnet id (in host order)
  37.277 + * @param vmac vmac (in network order)
  37.278 + * @return 0 on success, error code otherwise
  37.279 + */
  37.280 +int varp_send(u16 opcode, struct net_device *dev, struct sk_buff *skbin,
  37.281 +              u32 vnet, Vmac *vmac){
  37.282 +    int err = 0;
  37.283 +    int link_n = 0;
  37.284 +    int ip_n = sizeof(struct iphdr);
  37.285 +    int udp_n = sizeof(struct udphdr);
  37.286 +    int varp_n = sizeof(VarpHdr);
  37.287 +    struct sk_buff *skbout = NULL;
  37.288 +    struct in_device *in_dev = NULL;
  37.289 +    VarpHdr *varph = NULL;
  37.290 +    u8 macbuf[6] = {};
  37.291 +    u8 *smac, *dmac;
  37.292 +    u32 saddr, daddr;
  37.293 +    u16 sport, dport;
  37.294 +
  37.295 +    dmac = macbuf;
  37.296 +    dprintf("> opcode=%d vnet=%d vmac=" MACFMT "\n",
  37.297 +            opcode, ntohl(vnet), MAC6TUPLE(vmac->mac));
  37.298 +    if(!dev){
  37.299 +        //todo: should use routing for daddr to get device.
  37.300 +        err = vnet_get_device(varp_device, &dev);
  37.301 +        if(err) goto exit;
  37.302 +    }
  37.303 +    link_n = LL_RESERVED_SPACE(dev);
  37.304 +    in_dev = in_dev_get(dev);
  37.305 +    if(!in_dev) goto exit;
  37.306 +
  37.307 +    smac = dev->dev_addr;
  37.308 +    saddr = in_dev->ifa_list->ifa_address;
  37.309 +
  37.310 +    if(skbin){
  37.311 +        dmac = MAC_ETH(skbin)->h_source;
  37.312 +        sport = skbin->h.uh->dest;
  37.313 +        daddr = skbin->nh.iph->saddr;
  37.314 +        //dport = skbin->h.uh->source;
  37.315 +        dport = varp_port;
  37.316 +    } else {
  37.317 +        if(!in_dev) goto exit;
  37.318 +        if(MULTICAST(varp_mcast_addr)){
  37.319 +            daddr = varp_mcast_addr;
  37.320 +            ip_eth_mc_map(daddr, dmac);
  37.321 +        } else {
  37.322 +            daddr = in_dev->ifa_list->ifa_broadcast;
  37.323 +            dmac = dev->broadcast;
  37.324 +        }
  37.325 +        sport = varp_port;
  37.326 +        dport = varp_port;
  37.327 +    }
  37.328 +    in_dev_put(in_dev);
  37.329 +
  37.330 +    dprintf("> smac=" MACFMT " dmac=" MACFMT "\n", MAC6TUPLE(smac), MAC6TUPLE(dmac));
  37.331 +    dprintf("> saddr=" IPFMT " daddr=" IPFMT "\n", NIPQUAD(saddr), NIPQUAD(daddr));
  37.332 +    dprintf("> sport=%u dport=%u\n", ntohs(sport), ntohs(dport));
  37.333 +
  37.334 +    skbout = alloc_skb(link_n + ip_n + udp_n + varp_n, GFP_ATOMIC);
  37.335 +    if (!skbout){
  37.336 +        err = -ENOMEM;
  37.337 +        goto exit;
  37.338 +    }
  37.339 +    skbout->dev = dev;
  37.340 +    skb_reserve(skbout, link_n);
  37.341 +    skbout->protocol = htons(ETH_P_IP);
  37.342 +
  37.343 +    // Device header. Pushes device header on front of skb.
  37.344 +    if (dev->hard_header){
  37.345 +        err = dev->hard_header(skbout, dev, ETH_P_IP, dmac, smac, skbout->len);
  37.346 +        if(err < 0) goto exit;
  37.347 +        skbout->mac.raw = skbout->data;
  37.348 +    }
  37.349 +
  37.350 +    // IP header.
  37.351 +    skbout->nh.raw = skb_put(skbout, ip_n);
  37.352 +    skbout->nh.iph->version  = 4;
  37.353 +    skbout->nh.iph->ihl      = ip_n / 4;
  37.354 +    skbout->nh.iph->tos      = 0;
  37.355 +    skbout->nh.iph->tot_len  = htons(ip_n + udp_n + varp_n);
  37.356 +    skbout->nh.iph->id       = 0;
  37.357 +    skbout->nh.iph->frag_off = 0;
  37.358 +    skbout->nh.iph->ttl      = 64;
  37.359 +    skbout->nh.iph->protocol = IPPROTO_UDP;
  37.360 +    skbout->nh.iph->saddr    = saddr;
  37.361 +    skbout->nh.iph->daddr    = daddr;  
  37.362 +    skbout->nh.iph->check    = 0;
  37.363 +
  37.364 +    // UDP header.
  37.365 +    skbout->h.raw = skb_put(skbout, udp_n);
  37.366 +    skbout->h.uh->source     = sport;
  37.367 +    skbout->h.uh->dest       = dport;
  37.368 +    skbout->h.uh->len        = htons(udp_n + varp_n);
  37.369 +    skbout->h.uh->check      = 0;
  37.370 +
  37.371 +    // Varp header.
  37.372 +    varph = (void*)skb_put(skbout, varp_n);
  37.373 +    *varph = (VarpHdr){};
  37.374 +    varph->id                = htons(VARP_ID);
  37.375 +    varph->opcode            = htons(opcode);
  37.376 +    varph->vnet              = htonl(vnet);
  37.377 +    varph->vmac              = *vmac;
  37.378 +    varph->addr              = saddr;
  37.379 +
  37.380 +    err = skb_xmit(skbout);
  37.381 +
  37.382 +  exit:
  37.383 +    if(err && skbout) kfree_skb(skbout);
  37.384 +    dprintf("< err=%d\n", err);
  37.385 +    return err;
  37.386 +}
  37.387 +
  37.388 +/** Send a varp request for the vnet and destination mac of a packet.
  37.389 + *
  37.390 + * @param skb packet
  37.391 + * @param vnet vnet (in host order)
  37.392 + * @return 0 on success, error code otherwise
  37.393 + */
  37.394 +int varp_solicit(struct sk_buff *skb, int vnet){
  37.395 +    int err = 0;
  37.396 +    dprintf("> skb=%p\n", skb);
  37.397 +    varp_dprint();
  37.398 +    err = varp_send(VARP_OP_REQUEST, NULL, NULL,
  37.399 +                    vnet, (Vmac*)MAC_ETH(skb)->h_dest);
  37.400 +    dprintf("< err=%d\n", err);
  37.401 +    return err;
  37.402 +}
  37.403 +
  37.404 +/* Test some flags.
  37.405 + *
  37.406 + * @param z varp entry
  37.407 + * @param flags to test
  37.408 + * @return nonzero if flags set
  37.409 + */
  37.410 +int VarpEntry_get_flags(VarpEntry *z, int flags){
  37.411 +    return z->flags & flags;
  37.412 +}
  37.413 +
  37.414 +/** Set some flags.
  37.415 + *
  37.416 + * @param z varp entry
  37.417 + * @param flags to set
  37.418 + * @param set set flags on if nonzero, off if zero
  37.419 + * @return new flags value
  37.420 + */
  37.421 +int VarpEntry_set_flags(VarpEntry *z, int flags, int set){
  37.422 +    if(set){
  37.423 +        z->flags |= flags;
  37.424 +    } else {
  37.425 +        z->flags &= ~flags;
  37.426 +    }
  37.427 +    return z->flags;
  37.428 +}
  37.429 +
  37.430 +/** Print a varp entry.
  37.431 + *
  37.432 + * @param ventry varp entry
  37.433 + */
  37.434 +void VarpEntry_print(VarpEntry *ventry){
  37.435 +    if(ventry){
  37.436 +        char *c, *d;
  37.437 +        switch(ventry->state){
  37.438 +        case VARP_STATE_INCOMPLETE: c = "INC"; break;
  37.439 +        case VARP_STATE_REACHABLE:  c = "RCH"; break;
  37.440 +        case VARP_STATE_FAILED:     c = "FLD"; break;
  37.441 +        default:                    c = "UNK"; break;
  37.442 +        }
  37.443 +        d = (VarpEntry_get_flags(ventry, VARP_FLAG_PROBING) ? "P" : " ");
  37.444 +
  37.445 +        printk(KERN_INFO "VENTRY(%p ref=%1d %s %s vnet=%d vmac=" MACFMT " addr=" IPFMT " q=%d t=%lu)\n",
  37.446 +               ventry,
  37.447 +               atomic_read(&ventry->refcount),
  37.448 +               c, d,
  37.449 +               ventry->key.vnet,
  37.450 +               MAC6TUPLE(ventry->key.vmac.mac),
  37.451 +               NIPQUAD(ventry->addr),
  37.452 +               skb_queue_len(&ventry->queue),
  37.453 +               ventry->timestamp);
  37.454 +    } else {
  37.455 +        printk("VENTRY: Null!\n");
  37.456 +    }
  37.457 +}
  37.458 +
  37.459 +/** Free a varp entry.
  37.460 + *
  37.461 + * @param z varp entry
  37.462 + */
  37.463 +void VarpEntry_free(VarpEntry *z){
  37.464 +    if(!z) return;
  37.465 +    deallocate(z);
  37.466 +}
  37.467 +
  37.468 +/** Increment reference count.
  37.469 + *
  37.470 + * @param z varp entry (may be null)
  37.471 + */
  37.472 +void VarpEntry_incref(VarpEntry *z){
  37.473 +    if(!z) return;
  37.474 +    atomic_inc(&z->refcount);
  37.475 +    //dprintf("> "); VarpEntry_print(z);
  37.476 +}
  37.477 +
  37.478 +/** Decrement reference count, freeing if zero.
  37.479 + *
  37.480 + * @param z varp entry (may be null)
  37.481 + */
  37.482 +void VarpEntry_decref(VarpEntry *z){
  37.483 +    if(!z) return;
  37.484 +    //dprintf("> "); VarpEntry_print(z);
  37.485 +    if(atomic_dec_and_test(&z->refcount)){
  37.486 +        //dprintf("> freeing %p...\n", z);
  37.487 +        VarpEntry_free(z);
  37.488 +    }
  37.489 +}
  37.490 +
  37.491 +/** Call the error handler.
  37.492 + *
  37.493 + * @param ventry varp entry
  37.494 + */
  37.495 +void VarpEntry_error(VarpEntry *ventry){
  37.496 +    struct sk_buff *skb;
  37.497 +    skb = skb_peek(&ventry->queue);
  37.498 +    if(!skb) return;
  37.499 +    if(ventry->error) ventry->error(ventry, skb);
  37.500 +    skb_queue_purge(&ventry->queue);
  37.501 +}
  37.502 +
  37.503 +/** Schedule the varp entry timer.
  37.504 + * Must increment the reference count before doing
  37.505 + * this the first time, so the ventry won' be freed
  37.506 + * before the timer goes off.
  37.507 + *
  37.508 + * @param ventry varp entry
  37.509 + */
  37.510 +void VarpEntry_schedule(VarpEntry *ventry){
  37.511 +    unsigned long now = jiffies;
  37.512 +    ventry->timer.expires = now + VARP_PROBE_INTERVAL;
  37.513 +    add_timer(&ventry->timer);
  37.514 +}
  37.515 +
  37.516 +/** Function called when a varp entry timer goes off.
  37.517 + * If the entry is still incomplete, carries on probing.
  37.518 + * Otherwise stops probing.
  37.519 + *
  37.520 + * @param arg ventry
  37.521 + */
  37.522 +static void varp_timer_fn(unsigned long arg){
  37.523 +    unsigned long flags;
  37.524 +    VarpEntry *ventry = (VarpEntry *)arg;
  37.525 +    struct sk_buff *skb = NULL;
  37.526 +    int locked = 0, probing = 0;
  37.527 +
  37.528 +    dprintf(">\n"); //VarpEntry_print(ventry);
  37.529 +    VarpEntry_lock(ventry, flags);
  37.530 +    locked = 1;
  37.531 +    if(ventry->state == VARP_STATE_REACHABLE){
  37.532 +        // Do nothing.
  37.533 +    } else {
  37.534 +        // Probe if haven't run out of tries, otherwise fail.
  37.535 +        if(atomic_read(&ventry->probes) < VARP_PROBE_MAX){
  37.536 +            probing = 1;
  37.537 +            VarpEntry_schedule(ventry);
  37.538 +            skb = skb_peek(&ventry->queue);
  37.539 +            if(skb){
  37.540 +                dprintf("> skbs in queue - solicit\n");
  37.541 +                atomic_inc(&ventry->probes);
  37.542 +                VarpEntry_unlock(ventry, flags);
  37.543 +                locked = 0;
  37.544 +                varp_solicit(skb, ventry->key.vnet);
  37.545 +            } else {
  37.546 +                dprintf("> empty queue.\n");
  37.547 +            }
  37.548 +        } else {
  37.549 +            dprintf("> Out of probes: FAILED\n");
  37.550 +            VarpEntry_error(ventry);
  37.551 +            ventry->state = VARP_STATE_FAILED;
  37.552 +        }
  37.553 +    }
  37.554 +    VarpEntry_set_flags(ventry, VARP_FLAG_PROBING, probing);
  37.555 +    if(locked) VarpEntry_unlock(ventry, flags);
  37.556 +    if(!probing) VarpEntry_decref(ventry);
  37.557 +    dprintf("<\n");
  37.558 +}
  37.559 +
  37.560 +/** Default error function for varp entries.
  37.561 + *
  37.562 + * @param ventry varp entry
  37.563 + * @param skb packet dropped because of error
  37.564 + */
  37.565 +static void varp_error_fn(VarpEntry *ventry, struct sk_buff *skb){
  37.566 +}
  37.567 +
  37.568 +/** Create a varp entry. Initializes the internal state.
  37.569 + *
  37.570 + * @param vnet vnet id
  37.571 + * @param vmac virtual MAC address (copied)
  37.572 + * @return ventry or null
  37.573 + */
  37.574 +VarpEntry * VarpEntry_new(u32 vnet, Vmac *vmac){
  37.575 +    VarpEntry *z = ALLOCATE(VarpEntry);
  37.576 +    if(z){
  37.577 +        unsigned long now = jiffies;
  37.578 +
  37.579 +        atomic_set(&z->refcount, 1);
  37.580 +        z->lock = RW_LOCK_UNLOCKED;
  37.581 +        z->state = VARP_STATE_INCOMPLETE;
  37.582 +        z->queue_max = VARP_QUEUE_MAX;
  37.583 +        skb_queue_head_init(&z->queue);
  37.584 +        init_timer(&z->timer);
  37.585 +        z->timer.data = (unsigned long)z;
  37.586 +        z->timer.function = varp_timer_fn;
  37.587 +        z->timestamp = now;
  37.588 +        z->error = varp_error_fn;
  37.589 +
  37.590 +        z->key.vnet = vnet;
  37.591 +        z->key.vmac = *vmac;
  37.592 +    }
  37.593 +    return z;
  37.594 +}
  37.595 +
  37.596 +/** Hash function for keys in the varp cache.
  37.597 + * Hashes the vnet id and mac.
  37.598 + *
  37.599 + * @param k key (VarpKey)
  37.600 + * @return hashcode
  37.601 + */
  37.602 +Hashcode varp_key_hash_fn(void *k){
  37.603 +    VarpKey *key = k;
  37.604 +    Hashcode h;
  37.605 +    h = hash_2ul(key->vnet,
  37.606 +                 (key->vmac.mac[0] << 24) |
  37.607 +                 (key->vmac.mac[1] << 16) |
  37.608 +                 (key->vmac.mac[2] <<  8) |
  37.609 +                 (key->vmac.mac[3]      ));
  37.610 +    h = hash_hul(h, 
  37.611 +                 (key->vmac.mac[4] <<   8) |
  37.612 +                 (key->vmac.mac[5]       ));
  37.613 +    return h;
  37.614 +}
  37.615 +
  37.616 +/** Test equality for keys in the varp cache.
  37.617 + * Compares vnet and mac.
  37.618 + *
  37.619 + * @param k1 key to compare (VarpKey)
  37.620 + * @param k2 key to compare (VarpKey)
  37.621 + * @return 1 if equal, 0 otherwise
  37.622 + */
  37.623 +int varp_key_equal_fn(void *k1, void *k2){
  37.624 +    VarpKey *key1 = k1;
  37.625 +    VarpKey *key2 = k2;
  37.626 +    return (key1->vnet == key2->vnet)
  37.627 +        && (memcmp(key1->vmac.mac, key2->vmac.mac, ETH_ALEN) == 0);
  37.628 +}
  37.629 +
  37.630 +/** Free an entry in the varp cache.
  37.631 + *
  37.632 + * @param table containing table
  37.633 + * @param entry entry to free
  37.634 + */
  37.635 +static void varp_entry_free_fn(HashTable *table, HTEntry *entry){
  37.636 +    VarpEntry *ventry;
  37.637 +    if(!entry) return;
  37.638 +    ventry = entry->value;
  37.639 +    if(ventry) VarpEntry_decref(ventry);
  37.640 +    HTEntry_free(entry);
  37.641 +}
  37.642 +
  37.643 +/** Free the whole varp cache.
  37.644 + * Dangerous.
  37.645 + *
  37.646 + * @param z varp cache
  37.647 + */
  37.648 +void VarpTable_free(VarpTable *z){
  37.649 +    unsigned long flags;
  37.650 +    if(!z) return;
  37.651 +    VarpTable_write_lock(z, flags);
  37.652 +    del_timer(&z->timer);
  37.653 +    z->timer.data = 0;
  37.654 +    if(z->table) HashTable_free(z->table); 
  37.655 +    VarpTable_write_unlock(z, flags);
  37.656 +    deallocate(z);
  37.657 +}
  37.658 +
  37.659 +/** Schedule the varp table timer.
  37.660 + *
  37.661 + * @param z varp table
  37.662 + */
  37.663 +void VarpTable_schedule(VarpTable *z){
  37.664 +    unsigned long now = jiffies;
  37.665 +    z->timer.expires = now + VARP_ENTRY_TTL;
  37.666 +    add_timer(&z->timer);
  37.667 +}
  37.668 +
  37.669 +/** Function called when the varp table timer goes off.
  37.670 + * Sweeps old varp cache entries and reschedules itself.
  37.671 + *
  37.672 + * @param arg varp table
  37.673 + */
  37.674 +static void varp_table_timer_fn(unsigned long arg){
  37.675 +    VarpTable *z = (VarpTable *)arg;
  37.676 +    //dprintf("> z=%p\n", z);
  37.677 +    if(z){
  37.678 +        VarpTable_sweep(z, 0);
  37.679 +        VarpTable_schedule(z);
  37.680 +    }
  37.681 +    //dprintf("<\n");
  37.682 +}
  37.683 +
  37.684 +/** Print a varp table.
  37.685 + *
  37.686 + * @param z table
  37.687 + */
  37.688 +void VarpTable_print(VarpTable *z){
  37.689 +    HashTable_for_decl(entry);
  37.690 +    VarpEntry *ventry;
  37.691 +    unsigned long flags, vflags;
  37.692 +
  37.693 +    //dprintf(">\n");
  37.694 +    VarpTable_read_lock(z, flags);
  37.695 +    HashTable_for_each(entry, varp_table->table){
  37.696 +        ventry = entry->value;
  37.697 +        VarpEntry_lock(ventry, vflags);
  37.698 +        VarpEntry_print(ventry);
  37.699 +        VarpEntry_unlock(ventry, vflags);
  37.700 +    }
  37.701 +    VarpTable_read_unlock(z, flags);
  37.702 +    //dprintf("<\n");
  37.703 +}
  37.704 +
  37.705 +/** Create a varp table.
  37.706 + *
  37.707 + * @return new table or null
  37.708 + */
  37.709 +VarpTable * VarpTable_new(void){
  37.710 +    int err = -ENOMEM;
  37.711 +    VarpTable *z = NULL;
  37.712 +
  37.713 +    z = ALLOCATE(VarpTable);
  37.714 +    if(!z) goto exit;
  37.715 +    z->table = HashTable_new(VARP_TABLE_BUCKETS);
  37.716 +    if(!z->table) goto exit;
  37.717 +    z->table->key_equal_fn = varp_key_equal_fn;
  37.718 +    z->table->key_hash_fn = varp_key_hash_fn;
  37.719 +    z->table->entry_free_fn = varp_entry_free_fn;
  37.720 +    init_MUTEX(&z->lock);
  37.721 +    init_timer(&z->timer);
  37.722 +    z->timer.data = (unsigned long)z;
  37.723 +    z->timer.function = varp_table_timer_fn;
  37.724 +    VarpTable_schedule(z);
  37.725 +    err = 0;
  37.726 +  exit:
  37.727 +    if(err){
  37.728 +        VarpTable_free(z);
  37.729 +        z = NULL;
  37.730 +    }
  37.731 +    return z;
  37.732 +}
  37.733 +
  37.734 +/** Add a new entry to the varp table.
  37.735 + *
  37.736 + * @param z table
  37.737 + * @param vnet vnet id
  37.738 + * @param vmac virtual MAC address (copied)
  37.739 + * @return new entry or null
  37.740 + */
  37.741 +VarpEntry * VarpTable_add(VarpTable *z, u32 vnet, Vmac *vmac){
  37.742 +    int err = -ENOMEM;
  37.743 +    VarpEntry *ventry;
  37.744 +    HTEntry *entry;
  37.745 +    unsigned long flags;
  37.746 +
  37.747 +    ventry = VarpEntry_new(vnet, vmac);
  37.748 +    if(!ventry) goto exit;
  37.749 +    //dprintf("> "); VarpEntry_print(ventry);
  37.750 +    VarpTable_write_lock(z, flags);
  37.751 +    entry = HashTable_add(z->table, ventry, ventry);
  37.752 +    VarpTable_write_unlock(z, flags);
  37.753 +    if(!entry) goto exit;
  37.754 +    VarpEntry_incref(ventry);
  37.755 +    err = 0;
  37.756 +  exit:
  37.757 +    if(err){
  37.758 +        VarpEntry_free(ventry);
  37.759 +        ventry = NULL;
  37.760 +    }
  37.761 +    return ventry;
  37.762 +}
  37.763 +
  37.764 +/** Remove an entry from the varp table.
  37.765 + *
  37.766 + * @param z table
  37.767 + * @param ventry entry to remove
  37.768 + * @return removed count
  37.769 + */
  37.770 +int VarpTable_remove(VarpTable *z, VarpEntry *ventry){
  37.771 +    return HashTable_remove(z->table, ventry);
  37.772 +}
  37.773 +
  37.774 +/** Lookup an entry in the varp table.
  37.775 + *
  37.776 + * @param z table
  37.777 + * @param vnet vnet id
  37.778 + * @param vmac virtual MAC addres
  37.779 + * @return entry found or null
  37.780 + */
  37.781 +VarpEntry * VarpTable_lookup(VarpTable *z, u32 vnet, Vmac *vmac){
  37.782 +    unsigned long flags;
  37.783 +    VarpKey key = { .vnet = vnet, .vmac = *vmac };
  37.784 +    VarpEntry *ventry;
  37.785 +    VarpTable_read_lock(z, flags);
  37.786 +    ventry = HashTable_get(z->table, &key);
  37.787 +    VarpTable_read_unlock(z, flags);
  37.788 +    if(ventry) VarpEntry_incref(ventry);
  37.789 +    return ventry;
  37.790 +}
  37.791 +
  37.792 +/** Handle output for a reachable ventry.
  37.793 + * Send the skb using the tunnel to the care-of address.
  37.794 + *
  37.795 + * @param ventry varp entry
  37.796 + * @param skb skb to send
  37.797 + * @return 0 on success, error code otherwise
  37.798 + */
  37.799 +int VarpEntry_send(VarpEntry *ventry, struct sk_buff *skb){
  37.800 +    int err = 0;
  37.801 +    unsigned long flags = 0;
  37.802 +    u32 addr;
  37.803 +
  37.804 +    dprintf("> skb=%p\n", skb);
  37.805 +    addr = ventry->addr;
  37.806 +    VarpEntry_unlock(ventry, flags);
  37.807 +    err = vnet_tunnel_send(ventry->key.vnet, addr, skb);
  37.808 +    VarpEntry_lock(ventry, flags);
  37.809 +    dprintf("< err=%d\n", err);
  37.810 +    return err;
  37.811 +}
  37.812 +
  37.813 +/** Handle output for a non-reachable ventry. Send messages to complete it.
  37.814 + * If the entry is still incomplete, queue the skb, otherwise
  37.815 + * send it. If the queue is full, dequeue and free an old skb to
  37.816 + * make room for the new one.
  37.817 + *
  37.818 + * @param ventry varp entry
  37.819 + * @param skb skb to send
  37.820 + * @return 0 on success, error code otherwise
  37.821 + */
  37.822 +int VarpEntry_resolve(VarpEntry *ventry, struct sk_buff *skb){
  37.823 +    int err = 0;
  37.824 +    unsigned long flags = 0;
  37.825 +
  37.826 +    dprintf("> skb=%p\n", skb); //VarpEntry_print(ventry);
  37.827 +    ventry->state = VARP_STATE_INCOMPLETE;
  37.828 +    atomic_set(&ventry->probes, 1);
  37.829 +    if(!VarpEntry_get_flags(ventry, VARP_FLAG_PROBING)){
  37.830 +        VarpEntry_set_flags(ventry, VARP_FLAG_PROBING, 1);
  37.831 +        VarpEntry_incref(ventry);
  37.832 +        VarpEntry_schedule(ventry);
  37.833 +    }
  37.834 +    VarpEntry_unlock(ventry, flags);
  37.835 +    varp_solicit(skb, ventry->key.vnet);
  37.836 +    VarpEntry_lock(ventry, flags);
  37.837 +
  37.838 +    if(ventry->state == VARP_STATE_INCOMPLETE){
  37.839 +        if(skb_queue_len(&ventry->queue) >= ventry->queue_max){
  37.840 +            struct sk_buff *oldskb;
  37.841 +            oldskb = ventry->queue.next;
  37.842 +            __skb_unlink(oldskb, &ventry->queue);
  37.843 +            dprintf("> purging skb=%p\n", oldskb);
  37.844 +            kfree_skb(oldskb);
  37.845 +        }
  37.846 +        __skb_queue_tail(&ventry->queue, skb);
  37.847 +    } else {
  37.848 +        err = VarpEntry_send(ventry, skb);
  37.849 +    }
  37.850 +    dprintf("< err=%d\n", err);
  37.851 +    return err;
  37.852 +}
  37.853 +
  37.854 +/** Handle output for a ventry. Resolves the ventry
  37.855 + * if necessary.
  37.856 + *
  37.857 + * @param ventry varp entry
  37.858 + * @param skb skb to send
  37.859 + * @return 0 on success, error code otherwise
  37.860 + */
  37.861 +int VarpEntry_output(VarpEntry *ventry, struct sk_buff *skb){
  37.862 +    int err = 0;
  37.863 +
  37.864 +    switch(ventry->state){
  37.865 +    case VARP_STATE_REACHABLE:
  37.866 +        err = VarpEntry_send(ventry, skb);
  37.867 +        break;
  37.868 +    default:
  37.869 +        err = VarpEntry_resolve(ventry, skb);
  37.870 +        break;
  37.871 +    }
  37.872 +    return err;
  37.873 +}
  37.874 +
  37.875 +/** Process the output queue for a ventry.  Sends the queued skbs if
  37.876 + * the ventry is reachable, otherwise drops them.
  37.877 + *
  37.878 + * @param ventry varp entry
  37.879 + */
  37.880 +void VarpEntry_process_queue(VarpEntry *ventry){
  37.881 +    struct sk_buff *skb;
  37.882 +    for( ; ; ){
  37.883 +        if(ventry->state != VARP_STATE_REACHABLE) break;
  37.884 +        skb = __skb_dequeue(&ventry->queue);
  37.885 +        if(!skb) break;
  37.886 +        VarpEntry_output(ventry, skb);
  37.887 +    }
  37.888 +    skb_queue_purge(&ventry->queue);
  37.889 +}
  37.890 +
  37.891 +/** Update a ventry. Sets the address and state to those given
  37.892 + * and sets the timestamp to 'now'.
  37.893 + *
  37.894 + * @param ventry varp entry
  37.895 + * @param addr care-of address
  37.896 + * @param state state
  37.897 + * @return 0 on success, error code otherwise
  37.898 + */
  37.899 +int VarpEntry_update(VarpEntry *ventry, u32 addr, int state){
  37.900 +    int err = 0;
  37.901 +    unsigned long now = jiffies;
  37.902 +    unsigned long flags;
  37.903 +
  37.904 +    dprintf("> addr=" IPFMT " state=%d\n", NIPQUAD(addr), state);
  37.905 +    //VarpEntry_print(ventry);
  37.906 +    VarpEntry_lock(ventry, flags);
  37.907 +    if(VarpEntry_get_flags(ventry, VARP_FLAG_PERMANENT)) goto exit;
  37.908 +    ventry->addr = addr;
  37.909 +    ventry->timestamp = now;
  37.910 +    ventry->state = state;
  37.911 +    VarpEntry_process_queue(ventry);
  37.912 +  exit:
  37.913 +    //dprintf("> "); VarpEntry_print(ventry);
  37.914 +    VarpEntry_unlock(ventry, flags);
  37.915 +    dprintf("< err=%d\n", err);
  37.916 +    return err;
  37.917 +}
  37.918 +    
  37.919 +int VarpTable_update(VarpTable *z, int vnet, Vmac *vmac, u32 addr,
  37.920 +                     int state, int force){
  37.921 +    int err = 0;
  37.922 +    VarpEntry *ventry;
  37.923 +    
  37.924 +    dprintf("> vnet=%d mac=" MACFMT " addr=" IPFMT " state=%d force=%d\n",
  37.925 +            vnet, MAC6TUPLE(vmac->mac), NIPQUAD(addr), state, force);
  37.926 +    ventry = VarpTable_lookup(z, vnet, vmac);
  37.927 +    if(force && !ventry){
  37.928 +        dprintf("> No entry, adding\n");
  37.929 +        ventry = VarpTable_add(z, vnet, vmac);
  37.930 +    }
  37.931 +    if(ventry){
  37.932 +        dprintf("> Updating\n");
  37.933 +        err = VarpEntry_update(ventry, addr, state);
  37.934 +        VarpEntry_decref(ventry);
  37.935 +    } else {
  37.936 +        dprintf("> No entry found\n");
  37.937 +        err = -ENOENT;
  37.938 +    }
  37.939 +    dprintf("< err=%d\n", err);
  37.940 +    return err;
  37.941 +}
  37.942 +
  37.943 +/** Update the ventry corresponding to the given varp header.
  37.944 + *
  37.945 + * @param z table
  37.946 + * @param varph varp header
  37.947 + * @param state state
  37.948 + * @return 0 on success, -ENOENT if no entry found
  37.949 + */
  37.950 +int VarpTable_update_entry(VarpTable *z, VarpHdr *varph, int state){
  37.951 +    return VarpTable_update(z, ntohl(varph->vnet), &varph->vmac, varph->addr, state, 0);
  37.952 +}
  37.953 +
  37.954 +int varp_update(int vnet, unsigned char *vmac, u32 addr){
  37.955 +    if(!varp_table){
  37.956 +        return -ENOSYS;
  37.957 +    }
  37.958 +    return VarpTable_update(varp_table, vnet, (Vmac*)vmac, addr,
  37.959 +                            VARP_STATE_REACHABLE, 1);
  37.960 +}
  37.961 +
  37.962 +/** Put old varp entries into the incomplete state.
  37.963 + * Permanent entries are not changed.
  37.964 + * If 'all' is non-zero, all non-permanent entries
  37.965 + * are put into the incomplete state, regardless of age.
  37.966 + *
  37.967 + * @param z table
  37.968 + * @param all reset all entries if non-zero
  37.969 + */
  37.970 +void VarpTable_sweep(VarpTable *z, int all){
  37.971 +    HashTable_for_decl(entry);
  37.972 +    VarpEntry *ventry;
  37.973 +    unsigned long now = jiffies;
  37.974 +    unsigned long old = now - VARP_ENTRY_TTL;
  37.975 +    unsigned long flags, vflags;
  37.976 +
  37.977 +    //dprintf(">\n");
  37.978 +    VarpTable_read_lock(z, flags);
  37.979 +    HashTable_for_each(entry, varp_table->table){
  37.980 +        ventry = entry->value;
  37.981 +        VarpEntry_lock(ventry, vflags);
  37.982 +        if(!VarpEntry_get_flags(ventry, VARP_FLAG_PERMANENT) &&
  37.983 +           (all || (ventry->timestamp < old))){
  37.984 +            VarpEntry_process_queue(ventry);
  37.985 +            ventry->state = VARP_STATE_INCOMPLETE;
  37.986 +        }
  37.987 +        VarpEntry_unlock(ventry, vflags);
  37.988 +    }
  37.989 +    VarpTable_read_unlock(z, flags);
  37.990 +    //dprintf("<\n");
  37.991 +}
  37.992 +
  37.993 +/** Handle a varp request. Look for a vif with the requested 
  37.994 + * vnet and vmac. If find one, reply with the vnet, vmac and our
  37.995 + * address. Otherwise do nothing.
  37.996 + *
  37.997 + * @param skb incoming message
  37.998 + * @param varph varp message
  37.999 + * @return 0 if ok, -ENOENT if no matching vif, or error code
 37.1000 + */
 37.1001 +int varp_handle_request(struct sk_buff *skb, VarpHdr *varph){
 37.1002 +    int err = -ENOENT;
 37.1003 +    u32 vnet;
 37.1004 +    Vmac *vmac;
 37.1005 +    Vif *vif = NULL;
 37.1006 +
 37.1007 +    dprintf(">\n");
 37.1008 +    vnet = ntohl(varph->vnet);
 37.1009 +    vmac = &varph->vmac;
 37.1010 +    dprintf("> vnet=%d vmac=" MACFMT "\n", vnet, MAC6TUPLE(vmac->mac));
 37.1011 +    if(vif_lookup(vnet, vmac, &vif)) goto exit;
 37.1012 +    varp_send(VARP_OP_ANNOUNCE, skb->dev, skb, vnet, vmac);
 37.1013 +    vif_decref(vif);
 37.1014 +  exit:
 37.1015 +    dprintf("< err=%d\n", err);
 37.1016 +    return err;
 37.1017 +}
 37.1018 +
 37.1019 +/** Announce the vnet and vmac of a vif (gratuitous varp).
 37.1020 + *
 37.1021 + * @param dev device to send on (may be null)
 37.1022 + * @param vif vif
 37.1023 + * @return 0 on success, error code otherwise
 37.1024 + */
 37.1025 +int varp_announce_vif(struct net_device *dev, Vif *vif){
 37.1026 +    int err = 0;
 37.1027 +    dprintf(">\n");
 37.1028 +    if(!varp_table){
 37.1029 +        err = -ENOSYS;
 37.1030 +        goto exit;
 37.1031 +    }
 37.1032 +    err = varp_send(VARP_OP_ANNOUNCE, dev, NULL, vif->vnet, &vif->vmac);
 37.1033 +  exit:
 37.1034 +    dprintf("< err=%d\n", err);
 37.1035 +    return err;
 37.1036 +}
 37.1037 +
 37.1038 +/** Handle a varp announce message.
 37.1039 + * Update the matching ventry if we have one.
 37.1040 + *
 37.1041 + * @param skb incoming message
 37.1042 + * @param varp message
 37.1043 + * @return 0 if OK, -ENOENT if no matching entry
 37.1044 + */
 37.1045 +int varp_handle_announce(struct sk_buff *skb, VarpHdr *varph){
 37.1046 +    int err = 0;
 37.1047 +
 37.1048 +    dprintf(">\n");
 37.1049 +    err = VarpTable_update_entry(varp_table, varph, VARP_STATE_REACHABLE);
 37.1050 +    dprintf("< err=%d\n", err);
 37.1051 +    return err;
 37.1052 +}
 37.1053 +
 37.1054 +/** Handle an incoming varp message.
 37.1055 + *
 37.1056 + * @param skb incoming message
 37.1057 + * @return 0 if OK, error code otherwise
 37.1058 + */
 37.1059 +int varp_handle_message(struct sk_buff *skb){
 37.1060 +    // Assume h. nh set, skb->data point after udp hdr (at varphdr).
 37.1061 +    int err = -EINVAL, mine = 0;
 37.1062 +    VarpHdr *varph = (void*)(skb->h.uh + 1);
 37.1063 +
 37.1064 +    dprintf(">\n");
 37.1065 +    if(!varp_table){
 37.1066 +        err = -ENOSYS;
 37.1067 +        goto exit;
 37.1068 +    }
 37.1069 +    if(MULTICAST(skb->nh.iph->daddr) &&
 37.1070 +       (skb->nh.iph->daddr != varp_mcast_addr)){
 37.1071 +        // Ignore multicast packets not addressed to us.
 37.1072 +        err = 0;
 37.1073 +        dprintf("> daddr=" IPFMT " mcaddr=" IPFMT "\n",
 37.1074 +                NIPQUAD(skb->nh.iph->daddr), NIPQUAD(varp_mcast_addr));
 37.1075 +        goto exit;
 37.1076 +    }
 37.1077 +    if(skb->len < sizeof(*varph)){
 37.1078 +        wprintf("> Varp msg too short: %d < %d\n", skb->len, sizeof(*varph));
 37.1079 +        goto exit;
 37.1080 +    }
 37.1081 +    mine = 1;
 37.1082 +    if(varph->id != htons(VARP_ID)){
 37.1083 +        // It's not varp at all - ignore it.
 37.1084 +        wprintf("> Unknown id: %d \n", ntohs(varph->id));
 37.1085 +        goto exit;
 37.1086 +    }
 37.1087 +    if(1){
 37.1088 +        dprintf("> saddr=" IPFMT " daddr=" IPFMT "\n",
 37.1089 +                NIPQUAD(skb->nh.iph->saddr), NIPQUAD(skb->nh.iph->daddr));
 37.1090 +        dprintf("> sport=%u dport=%u\n", ntohs(skb->h.uh->source), ntohs(skb->h.uh->dest));
 37.1091 +        dprintf("> opcode=%d vnet=%u vmac=" MACFMT " addr=" IPFMT "\n",
 37.1092 +                ntohs(varph->opcode),
 37.1093 +                ntohl(varph->vnet),
 37.1094 +                MAC6TUPLE(varph->vmac.mac),
 37.1095 +                NIPQUAD(varph->addr));
 37.1096 +        varp_dprint();
 37.1097 +    }
 37.1098 +    switch(ntohs(varph->opcode)){
 37.1099 +    case VARP_OP_REQUEST:
 37.1100 +        err = varp_handle_request(skb, varph);
 37.1101 +        break;
 37.1102 +    case VARP_OP_ANNOUNCE:
 37.1103 +        err = varp_handle_announce(skb, varph);
 37.1104 +        break;
 37.1105 +    default:
 37.1106 +        wprintf("> Unknown opcode: %d \n", ntohs(varph->opcode));
 37.1107 +       break;
 37.1108 +    }
 37.1109 +  exit:
 37.1110 +    if(mine) err = 1;
 37.1111 +    dprintf("< err=%d\n", err);
 37.1112 +    return err;
 37.1113 +}
 37.1114 +
 37.1115 +/** Send an outgoing packet on the appropriate vnet tunnel.
 37.1116 + *
 37.1117 + * @param skb outgoing message
 37.1118 + * @param vnet vnet (host order)
 37.1119 + * @return 0 on success, error code otherwise
 37.1120 + */
 37.1121 +int varp_output(struct sk_buff *skb, u32 vnet){
 37.1122 +    int err = 0;
 37.1123 +    unsigned char *mac = NULL;
 37.1124 +    Vmac *vmac = NULL;
 37.1125 +    VarpEntry *ventry = NULL;
 37.1126 +
 37.1127 +    dprintf("> skb=%p vnet=%u\n", skb, vnet);
 37.1128 +    if(!varp_table){
 37.1129 +        err = -ENOSYS;
 37.1130 +        goto exit;
 37.1131 +    }
 37.1132 +    dprintf("> skb.mac=%p\n", skb->mac.raw);
 37.1133 +    if(!skb->mac.raw){
 37.1134 +        wprintf("> No ethhdr in skb!\n");
 37.1135 +        err = -EINVAL;
 37.1136 +        goto exit;
 37.1137 +    }
 37.1138 +    mac = MAC_ETH(skb)->h_dest;
 37.1139 +    vmac = (Vmac*)mac;
 37.1140 +    if(mac_is_multicast(mac)){
 37.1141 +        err = vnet_tunnel_send(vnet, varp_mcast_addr, skb);
 37.1142 +    } else {
 37.1143 +        ventry = VarpTable_lookup(varp_table, vnet, vmac);
 37.1144 +        if(!ventry){
 37.1145 +            ventry = VarpTable_add(varp_table, vnet, vmac);
 37.1146 +        }
 37.1147 +        if(ventry){
 37.1148 +            unsigned long flags;
 37.1149 +            VarpEntry_lock(ventry, flags);
 37.1150 +            err = VarpEntry_output(ventry, skb);
 37.1151 +            VarpEntry_unlock(ventry, flags);
 37.1152 +            VarpEntry_decref(ventry);
 37.1153 +        } else {
 37.1154 +            err = -ENOMEM;
 37.1155 +        }
 37.1156 +    }
 37.1157 +  exit:
 37.1158 +    dprintf("< err=%d\n", err);
 37.1159 +    return err;
 37.1160 +}
 37.1161 +
 37.1162 +/** Set the varp multicast address (after initialization).
 37.1163 + *
 37.1164 + * @param addr address (network order)
 37.1165 + * @return 0 on success, error code otherwise
 37.1166 + */
 37.1167 +int varp_set_mcast_addr(uint32_t addr){
 37.1168 +    int err = 0;
 37.1169 +    varp_close();
 37.1170 +    varp_mcast_addr = addr;
 37.1171 +    err = varp_open(varp_mcast_addr, varp_ucast_addr, varp_port);
 37.1172 +    return err;
 37.1173 +}
 37.1174 +
 37.1175 +/** Initialize the varp multicast address from a module parameter.
 37.1176 + *
 37.1177 + * @param s address in IPv4 notation
 37.1178 + * @return 0 on success, error code otherwise
 37.1179 + */
 37.1180 +static void varp_init_mcast_addr(char *s){
 37.1181 +    unsigned long v = 0;
 37.1182 +
 37.1183 +    dprintf("> %s\n", s);
 37.1184 +    if(s && (get_inet_addr(s, &v) >= 0)){
 37.1185 +        varp_mcast_addr = (u32)v;
 37.1186 +    } else {
 37.1187 +        varp_mcast_addr = htonl(VARP_MCAST_ADDR);
 37.1188 +    }
 37.1189 +}
 37.1190 +
 37.1191 +/** Initialize the varp cache.
 37.1192 + *
 37.1193 + * @return 0 on success, error code otherwise
 37.1194 + */
 37.1195 +int varp_init(void){
 37.1196 +    int err = 0;
 37.1197 +    struct net_device *dev = NULL;
 37.1198 +    
 37.1199 +    dprintf(">\n");
 37.1200 +    varp_table = VarpTable_new();
 37.1201 +    if(!varp_table){
 37.1202 +        err = -ENOMEM;
 37.1203 +        goto exit;
 37.1204 +    }
 37.1205 +    varp_init_mcast_addr(varp_mcaddr);
 37.1206 +    err = vnet_get_device(varp_device, &dev);
 37.1207 +    dprintf("> vnet_get_device(%s)=%d\n", varp_device, err);
 37.1208 +    if(err) goto exit;
 37.1209 +    err = vnet_get_device_address(dev, &varp_ucast_addr);
 37.1210 +    dprintf("> vnet_get_device_address()=%d\n", err);
 37.1211 +    if(err) goto exit;
 37.1212 +    varp_port = htons(VARP_PORT);
 37.1213 +
 37.1214 +    err = varp_open(varp_mcast_addr, varp_ucast_addr, varp_port);
 37.1215 +    dprintf("> varp_open()=%d\n", err);
 37.1216 +  exit:
 37.1217 +    if(dev) dev_put(dev);
 37.1218 +    dprintf("< err=%d\n", err);
 37.1219 +    return err;
 37.1220 +}
 37.1221 +
 37.1222 +/** Close the varp cache.
 37.1223 + */
 37.1224 +void varp_exit(void){
 37.1225 +    dprintf(">\n");
 37.1226 +    varp_close();
 37.1227 +    if(varp_table){
 37.1228 +        VarpTable *z = varp_table;
 37.1229 +        varp_table = NULL;
 37.1230 +        VarpTable_free(z);
 37.1231 +    }
 37.1232 +    dprintf("<\n");
 37.1233 +}
 37.1234 +
 37.1235 +MODULE_PARM(varp_mcaddr, "s");
 37.1236 +MODULE_PARM_DESC(varp_mcaddr, "VARP multicast address");
 37.1237 +
 37.1238 +MODULE_PARM(varp_device, "s");
 37.1239 +MODULE_PARM_DESC(varp_device, "VARP network device");
    38.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    38.2 +++ b/tools/vnet/vnet-module/varp.h	Mon Nov 22 16:49:15 2004 +0000
    38.3 @@ -0,0 +1,144 @@
    38.4 +/*
    38.5 + * Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
    38.6 + *
    38.7 + * This program is free software; you can redistribute it and/or modify
    38.8 + * it under the terms of the GNU General Public License as published by the 
    38.9 + * Free Software Foundation; either version 2 of the License, or (at your
   38.10 + * option) any later version.
   38.11 + * 
   38.12 + * This program is distributed in the hope that it will be useful, but
   38.13 + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
   38.14 + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
   38.15 + * for more details.
   38.16 + *
   38.17 + * You should have received a copy of the GNU General Public License along
   38.18 + * with this program; if not, write to the Free software Foundation, Inc.,
   38.19 + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA
   38.20 + *
   38.21 + */
   38.22 +
   38.23 +#ifndef _VNET_VARP_H
   38.24 +#define _VNET_VARP_H
   38.25 +
   38.26 +#define CONFIG_VARP_GRATUITOUS 1
   38.27 +
   38.28 +struct net_device;
   38.29 +struct sk_buff;
   38.30 +struct Vif;
   38.31 +
   38.32 +#define DEVICE "xen-br0"
   38.33 +
   38.34 +extern int vnet_get_device(const char *name, struct net_device **dev);
   38.35 +extern int vnet_get_device_address(struct net_device *dev, u32 *addr);
   38.36 +
   38.37 +extern int varp_handle_message(struct sk_buff *skb);
   38.38 +extern int varp_output(struct sk_buff *skb, u32 vnet);
   38.39 +extern int varp_update(int vnet, unsigned char *vmac, u32 addr);
   38.40 +
   38.41 +extern int varp_init(void);
   38.42 +extern void varp_exit(void);
   38.43 +
   38.44 +extern int varp_open(u32 mcaddr, u32 addr, u16 port);
   38.45 +extern void varp_close(void);
   38.46 +extern int varp_set_mcast_addr(u32 addr);
   38.47 +
   38.48 +extern void varp_print(void);
   38.49 +
   38.50 +extern int varp_announce_vif(struct net_device *dev, struct Vif *vif);
   38.51 +//extern int varp_announce_vifs(struct net_device *dev, struct task_struct *domain);
   38.52 +
   38.53 +extern u32 varp_mcast_addr;
   38.54 +
   38.55 +
   38.56 +/* MAC broadcast addr is ff-ff-ff-ff-ff-ff (all 1's).
   38.57 + * MAC multicast addr has low bit 1, i.e. 01-00-00-00-00-00.
   38.58 + */
   38.59 +
   38.60 +/** Test if a MAC address is a multicast or broadcast address.
   38.61 + *
   38.62 + * @param mac address
   38.63 + * @return 1 if it is, 0 if not
   38.64 + */
   38.65 +static inline int mac_is_multicast(u8 mac[ETH_ALEN]){
   38.66 +    return mac[0] & 1;
   38.67 +}
   38.68 +
   38.69 +/** Test if a MAC address is the broadcast address.
   38.70 + *
   38.71 + * @param mac address
   38.72 + * @return 1 if it is, 0 if not
   38.73 + */
   38.74 +static inline int mac_is_broadcast(u8 mac[ETH_ALEN]){
   38.75 +    u8 mac_bcast_val[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
   38.76 +    return memcmp(mac, mac_bcast_val, ETH_ALEN) == 0;
   38.77 +}
   38.78 +
   38.79 +/** Test if a MAC address is the all-zero address.
   38.80 + *
   38.81 + * @param mac address
   38.82 + * @return 1 if it is, 0 if not
   38.83 + */
   38.84 +static inline int mac_is_zero(u8 mac[ETH_ALEN]){
   38.85 +    u8 mac_zero_val[ETH_ALEN] = {};
   38.86 +    return memcmp(mac, mac_zero_val, ETH_ALEN) == 0;
   38.87 +}
   38.88 +
   38.89 +/** Print format for a mac address. */
   38.90 +#define MACFMT "%02x:%02x:%02x:%02x:%02x:%02x"
   38.91 +
   38.92 +#define MAC6TUPLE(_mac) (_mac)[0], (_mac)[1], (_mac)[2], (_mac)[3], (_mac)[4], (_mac)[5]
   38.93 +
   38.94 +/** Get the subnet defined by a netmask and addr.
   38.95 + *
   38.96 + * @param netmask subnet netmask
   38.97 + * @param addr    subnet address
   38.98 + * @return subnet
   38.99 + */
  38.100 +static inline u32 subnet_net(u32 netmask, u32 addr){
  38.101 +    return netmask & addr;
  38.102 +}
  38.103 +
  38.104 +/** Get the address within a subnet.
  38.105 + *
  38.106 + * @param netmask subnet netmask
  38.107 + * @param addr    address
  38.108 + * @return address within the subnet
  38.109 + */
  38.110 +static inline u32 subnet_addr(u32 netmask, u32 addr){
  38.111 +    return ~netmask & addr;
  38.112 +}
  38.113 +
  38.114 +/** Get the broadcast address for a subnet.
  38.115 + *
  38.116 + * @param netmask subnet netmask
  38.117 + * @param netaddr subnet address
  38.118 + * @return subnet broadcast address
  38.119 + */
  38.120 +static inline u32 subnet_broadcast_addr(u32 netmask, u32 netaddr){
  38.121 +    return subnet_net(netmask, netaddr) | ~netmask;
  38.122 +}
  38.123 +
  38.124 +/** Test if an address corresponds to a subnet broadcast.
  38.125 + * True if the address within the subnet is all 1's (in binary).
  38.126 + * (even if the address is not in the subnet).
  38.127 + *
  38.128 + * @param netmask subnet mask
  38.129 + * @param add     address
  38.130 + * @return 1 if it does, 0 otherwise
  38.131 + */
  38.132 +static inline int subnet_broadcast(u32 netmask, u32 addr){
  38.133 +    return subnet_addr(netmask, INADDR_ANY) == subnet_addr(netmask, addr);
  38.134 +}
  38.135 +
  38.136 +/** Test if an address is in a subnet.
  38.137 + *
  38.138 + * @param netmask subnet mask
  38.139 + * @param netaddr subnet address
  38.140 + * @param addr    address
  38.141 + * @return 1 if it is, 0 otherwise
  38.142 + */
  38.143 +static inline int subnet_local(u32 netmask, u32 netaddr, u32 addr){
  38.144 +    return subnet_net(netmask, netaddr) == subnet_net(netmask, addr);
  38.145 +}
  38.146 +
  38.147 +#endif /* ! _VNET_VARP_H */
    39.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    39.2 +++ b/tools/vnet/vnet-module/varp_socket.c	Mon Nov 22 16:49:15 2004 +0000
    39.3 @@ -0,0 +1,639 @@
    39.4 +/*
    39.5 + * Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
    39.6 + *
    39.7 + * This program is free software; you can redistribute it and/or modify
    39.8 + * it under the terms of the GNU General Public License as published by the 
    39.9 + * Free Software Foundation; either version 2 of the License, or (at your
   39.10 + * option) any later version.
   39.11 + * 
   39.12 + * This program is distributed in the hope that it will be useful, but
   39.13 + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
   39.14 + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
   39.15 + * for more details.
   39.16 + *
   39.17 + * You should have received a copy of the GNU General Public License along
   39.18 + * with this program; if not, write to the Free software Foundation, Inc.,
   39.19 + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA
   39.20 + *
   39.21 + */
   39.22 +#include <linux/kernel.h>
   39.23 +#include <linux/types.h>
   39.24 +#include <linux/version.h>
   39.25 +
   39.26 +#include <asm/uaccess.h>
   39.27 +#include <linux/net.h>
   39.28 +#include <linux/in.h>
   39.29 +#include <linux/sched.h>
   39.30 +#include <linux/file.h>
   39.31 +#include <linux/version.h>
   39.32 +#include <linux/smp_lock.h>
   39.33 +#include <net/sock.h>
   39.34 +
   39.35 +#include <if_varp.h>
   39.36 +#include <varp.h>
   39.37 +
   39.38 +/* Get macros needed to define system calls as functions in the kernel. */
   39.39 +#define __KERNEL_SYSCALLS__
   39.40 +static int errno;
   39.41 +#include <linux/unistd.h>
   39.42 +
   39.43 +#define MODULE_NAME "VARP"
   39.44 +#define DEBUG 1
   39.45 +#undef DEBUG
   39.46 +#include "debug.h"
   39.47 +
   39.48 +// Compensate for struct sock fields having 'sk_' added
   39.49 +// to them in 2.6.
   39.50 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
   39.51 +
   39.52 +#define SK_RECEIVE_QUEUE sk_receive_queue
   39.53 +#define SK_SLEEP         sk_sleep
   39.54 +
   39.55 +#else
   39.56 +
   39.57 +#define SK_RECEIVE_QUEUE receive_queue
   39.58 +#define SK_SLEEP         sleep
   39.59 +
   39.60 +#endif
   39.61 +
   39.62 +/** @file
   39.63 + * Support for the VARP udp sockets.
   39.64 + */
   39.65 +
   39.66 +static inline mm_segment_t change_fs(mm_segment_t fs){
   39.67 +    mm_segment_t oldfs = get_fs();
   39.68 +    set_fs(fs);
   39.69 +    return oldfs;
   39.70 +}
   39.71 +
   39.72 +/* Replicate the user-space socket API.
   39.73 + * The parts we need anyway.
   39.74 + */
   39.75 +
   39.76 +/* Define the socketcall() syscall.
   39.77 + * Multiplexes all the socket-related calls.
   39.78 + *
   39.79 + * @param call socket call id
   39.80 + * @param args arguments (upto 6)
   39.81 + * @return call-dependent value
   39.82 + */
   39.83 +static inline _syscall2(int, socketcall,
   39.84 +                        int, call,
   39.85 +                        unsigned long *, args)
   39.86 +
   39.87 +int socket(int family, int type, int protocol){
   39.88 +    unsigned long args[6];
   39.89 +    
   39.90 +    args[0] = (unsigned long)family;
   39.91 +    args[1] = (unsigned long)type;
   39.92 +    args[2] = (unsigned long)protocol;
   39.93 +    return socketcall(SYS_SOCKET, args);
   39.94 +}
   39.95 +
   39.96 +int bind(int fd, struct sockaddr *umyaddr, int addrlen){
   39.97 +    unsigned long args[6];
   39.98 +    
   39.99 +    args[0] = (unsigned long)fd;
  39.100 +    args[1] = (unsigned long)umyaddr;
  39.101 +    args[2] = (unsigned long)addrlen;
  39.102 +    return socketcall(SYS_BIND, args);
  39.103 +}
  39.104 +
  39.105 +int connect(int fd, struct sockaddr *uservaddr, int addrlen){
  39.106 +    unsigned long args[6];
  39.107 +    
  39.108 +    args[0] = (unsigned long)fd;
  39.109 +    args[1] = (unsigned long)uservaddr;
  39.110 +    args[2] = (unsigned long)addrlen;
  39.111 +    return socketcall(SYS_CONNECT, args);
  39.112 +}
  39.113 +
  39.114 +int sendto(int fd, void * buff, size_t len,
  39.115 +           unsigned flags, struct sockaddr *addr,
  39.116 +           int addr_len){
  39.117 +    unsigned long args[6];
  39.118 +    
  39.119 +    args[0] = (unsigned long)fd;
  39.120 +    args[1] = (unsigned long)buff;
  39.121 +    args[2] = (unsigned long)len;
  39.122 +    args[3] = (unsigned long)flags;
  39.123 +    args[4] = (unsigned long)addr;
  39.124 +    args[5] = (unsigned long)addr_len;
  39.125 +    return socketcall(SYS_SENDTO, args);
  39.126 +}
  39.127 +
  39.128 +int recvfrom(int fd, void * ubuf, size_t size,
  39.129 +             unsigned flags, struct sockaddr *addr,
  39.130 +             int *addr_len){
  39.131 +    unsigned long args[6];
  39.132 +    
  39.133 +    args[0] = (unsigned long)fd;
  39.134 +    args[1] = (unsigned long)ubuf;
  39.135 +    args[2] = (unsigned long)size;
  39.136 +    args[3] = (unsigned long)flags;
  39.137 +    args[4] = (unsigned long)addr;
  39.138 +    args[5] = (unsigned long)addr_len;
  39.139 +    return socketcall(SYS_RECVFROM, args);
  39.140 +}
  39.141 +
  39.142 +int setsockopt(int fd, int level, int optname, void *optval, int optlen){
  39.143 +    unsigned long args[6];
  39.144 +    
  39.145 +    args[0] = (unsigned long)fd;
  39.146 +    args[1] = (unsigned long)level;
  39.147 +    args[2] = (unsigned long)optname;
  39.148 +    args[3] = (unsigned long)optval;
  39.149 +    args[4] = (unsigned long)optlen;
  39.150 +    return socketcall(SYS_SETSOCKOPT, args);
  39.151 +}
  39.152 +
  39.153 +int getsockopt(int fd, int level, int optname, void *optval, int *optlen){
  39.154 +    unsigned long args[6];
  39.155 +    
  39.156 +    args[0] = (unsigned long)fd;
  39.157 +    args[1] = (unsigned long)level;
  39.158 +    args[2] = (unsigned long)optname;
  39.159 +    args[3] = (unsigned long)optval;
  39.160 +    args[4] = (unsigned long)optlen;
  39.161 +    return socketcall(SYS_GETSOCKOPT, args);
  39.162 +}
  39.163 +
  39.164 +int shutdown(int fd, int how){
  39.165 +    unsigned long args[6];
  39.166 +    
  39.167 +    args[0] = (unsigned long)fd;
  39.168 +    args[1] = (unsigned long)how;
  39.169 +    return socketcall(SYS_SHUTDOWN, args);
  39.170 +}
  39.171 +
  39.172 +int getsockname(int fd, struct sockaddr *usockaddr, int *usockaddr_len){
  39.173 +    unsigned long args[6];
  39.174 +    
  39.175 +    args[0] = (unsigned long)fd;
  39.176 +    args[1] = (unsigned long)usockaddr;
  39.177 +    args[2] = (unsigned long)usockaddr_len;
  39.178 +    return socketcall(SYS_GETSOCKNAME, args);
  39.179 +}
  39.180 +
  39.181 +/*============================================================================*/
  39.182 +/** Socket flags. */
  39.183 +enum {
  39.184 +    VSOCK_REUSE     =  1,
  39.185 +    VSOCK_BIND      =  2,
  39.186 +    VSOCK_CONNECT   =  4,
  39.187 +    VSOCK_BROADCAST =  8,
  39.188 +    VSOCK_MULTICAST = 16,
  39.189 + };
  39.190 +
  39.191 +/** Convert socket flags to a string.
  39.192 + *
  39.193 + * @param flags flags
  39.194 + * @return static string
  39.195 + */
  39.196 +char * socket_flags(int flags){
  39.197 +    static char s[6];
  39.198 +    int i = 0;
  39.199 +    s[i++] = (flags & VSOCK_CONNECT   ? 'c' : '-');
  39.200 +    s[i++] = (flags & VSOCK_BIND      ? 'b' : '-');
  39.201 +    s[i++] = (flags & VSOCK_REUSE     ? 'r' : '-');
  39.202 +    s[i++] = (flags & VSOCK_BROADCAST ? 'B' : '-');
  39.203 +    s[i++] = (flags & VSOCK_MULTICAST ? 'M' : '-');
  39.204 +    s[i++] = '\0';
  39.205 +    return s;
  39.206 +}
  39.207 +
  39.208 +/** The varp multicast socket. */
  39.209 +int varp_mcast_sock = -1;
  39.210 +
  39.211 +/** The varp unicast socket. */
  39.212 +int varp_ucast_sock = -1;
  39.213 +
  39.214 +/** Control flag for whether varp should be running.
  39.215 + * If this is set 0 then the varp thread will notice and
  39.216 + * (eventually) exit. This is indicated by setting varp_running
  39.217 + * to 0.
  39.218 + */
  39.219 +atomic_t varp_run = ATOMIC_INIT(0);
  39.220 +
  39.221 +/** State flag indicating whether the varp thread is running. */
  39.222 +atomic_t varp_running = ATOMIC_INIT(0);
  39.223 +
  39.224 +/** Set socket option to reuse address.
  39.225 + *
  39.226 + * @param sock socket
  39.227 + * @param reuse flag
  39.228 + * @return 0 on success, error code otherwise
  39.229 + */
  39.230 +int setsock_reuse(int sock, int reuse){
  39.231 +    int err = 0;
  39.232 +    err = setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof(reuse));
  39.233 +    if(err < 0){
  39.234 +        eprintf("> setsockopt SO_REUSEADDR: %d %d\n", err, errno);
  39.235 +    }
  39.236 +    return err;
  39.237 +}
  39.238 +
  39.239 +/** Set socket broadcast option.
  39.240 + *
  39.241 + * @param sock socket
  39.242 + * @param bcast flag
  39.243 + * @return 0 on success, error code otherwise
  39.244 + */
  39.245 +int setsock_broadcast(int sock, int bcast){
  39.246 +    int err = 0;
  39.247 +    err = setsockopt(sock, SOL_SOCKET, SO_BROADCAST, &bcast, sizeof(bcast));
  39.248 +    if(err < 0){
  39.249 +        eprintf("> setsockopt SO_BROADCAST: %d %d\n", err, errno);
  39.250 +    }
  39.251 +    return err;
  39.252 +}
  39.253 +
  39.254 +/** Join a socket to a multicast group.
  39.255 + *
  39.256 + * @param sock socket
  39.257 + * @param saddr multicast address
  39.258 + * @return 0 on success, error code otherwise
  39.259 + */
  39.260 +int setsock_multicast(int sock, uint32_t saddr){
  39.261 +    int err = 0;
  39.262 +    struct net_device *dev = NULL;
  39.263 +    u32 addr = 0;
  39.264 +    struct ip_mreqn mreq = {};
  39.265 +    int mloop = 0;
  39.266 +
  39.267 +    err = vnet_get_device(DEVICE, &dev);
  39.268 +    if(err){
  39.269 +        eprintf("> error getting device: %d %d\n", err, errno);
  39.270 +        goto exit;
  39.271 +    }
  39.272 +    err = vnet_get_device_address(dev, &addr);
  39.273 +    if(err){
  39.274 +        eprintf("> error getting device address: %d %d\n", err, errno);
  39.275 +        goto exit;
  39.276 +    }
  39.277 +    // See 'man 7 ip' for these options.
  39.278 +    mreq.imr_multiaddr.s_addr = saddr;       // IP multicast address.
  39.279 +    //mreq.imr_address.s_addr   = addr;        // Interface IP address.
  39.280 +    mreq.imr_address.s_addr   = INADDR_ANY;  // Interface IP address.
  39.281 +    mreq.imr_ifindex = 0;                    // Interface index (0 means any).
  39.282 +    dprintf("> saddr=%u.%u.%u.%u addr=%u.%u.%u.%u ifindex=%d\n",
  39.283 +            NIPQUAD(saddr), NIPQUAD(addr), mreq.imr_ifindex);
  39.284 +    err = setsockopt(sock, SOL_IP, IP_MULTICAST_LOOP, &mloop, sizeof(mloop));
  39.285 +    if(err < 0){
  39.286 +        eprintf("> setsockopt IP_MULTICAST_LOOP: %d %d\n", err, errno);
  39.287 +        goto exit;
  39.288 +    }
  39.289 +    err = setsockopt(sock, SOL_IP, IP_ADD_MEMBERSHIP, &mreq, sizeof(mreq));
  39.290 +    if(err < 0){
  39.291 +        eprintf("> setsockopt IP_ADD_MEMBERSHIP: %d %d\n", err, errno);
  39.292 +        goto exit;
  39.293 +    }
  39.294 +  exit:
  39.295 +    err = 0; //todo: remove hack
  39.296 +    return err;
  39.297 +}
  39.298 +
  39.299 +/** Set a socket's multicast ttl (default is 1).
  39.300 + * @param sock socket
  39.301 + * @param ttl ttl
  39.302 + * @return 0 on success, error code otherwise
  39.303 + */
  39.304 +int setsock_multicast_ttl(int sock, uint8_t ttl){
  39.305 +    int err = 0;
  39.306 +    err = setsockopt(sock, SOL_IP, IP_MULTICAST_TTL, &ttl, sizeof(ttl));
  39.307 +    return err;
  39.308 +}
  39.309 +
  39.310 +/** Create a socket.
  39.311 + * The flags can include VSOCK_REUSE, VSOCK_BROADCAST, VSOCK_CONNECT.
  39.312 + *
  39.313 + * @param socktype socket type
  39.314 + * @param saddr address
  39.315 + * @param port port
  39.316 + * @param flags flags
  39.317 + * @param val return value for the socket connection
  39.318 + * @return 0 on success, error code otherwise
  39.319 + */
  39.320 +int create_socket(int socktype, uint32_t saddr, uint32_t port, int flags, int *val){
  39.321 +    int err = 0;
  39.322 +    int sock;
  39.323 +    struct sockaddr_in addr_in;
  39.324 +    struct sockaddr *addr = (struct sockaddr *)&addr_in;
  39.325 +    int addr_n = sizeof(addr_in);
  39.326 +    int reuse, bcast;
  39.327 +    int sockproto = 0;
  39.328 +
  39.329 +    //dprintf(">\n");
  39.330 +    reuse = (flags & VSOCK_REUSE);
  39.331 +    bcast = (flags & VSOCK_BROADCAST);
  39.332 +    addr_in.sin_family      = AF_INET;
  39.333 +    addr_in.sin_addr.s_addr = saddr;
  39.334 +    addr_in.sin_port        = port;
  39.335 +    dprintf("> flags=%s addr=%u.%u.%u.%u port=%d\n",
  39.336 +            socket_flags(flags),
  39.337 +            NIPQUAD(saddr), ntohs(port));
  39.338 +
  39.339 +    switch(socktype){
  39.340 +    case SOCK_DGRAM:  sockproto = IPPROTO_UDP; break;
  39.341 +    case SOCK_STREAM: sockproto = IPPROTO_TCP; break;
  39.342 +    }
  39.343 +    sock = socket(AF_INET, socktype, sockproto);
  39.344 +    if(sock < 0) goto exit;
  39.345 +    if(reuse){
  39.346 +        err = setsock_reuse(sock, reuse);
  39.347 +        if(err < 0) goto exit;
  39.348 +    }
  39.349 +    if(bcast){
  39.350 +        err = setsock_broadcast(sock, bcast);
  39.351 +        if(err < 0) goto exit;
  39.352 +    }
  39.353 +    if(flags & VSOCK_MULTICAST){
  39.354 +        err = setsock_multicast(sock, saddr);
  39.355 +        if(err < 0) goto exit;
  39.356 +    }
  39.357 +    if(flags & VSOCK_CONNECT){
  39.358 +        err = connect(sock, addr, addr_n);
  39.359 +        if(err < 0) goto exit;
  39.360 +    }
  39.361 +    if(flags & VSOCK_BIND){
  39.362 +        err = bind(sock, addr, addr_n);
  39.363 +        if(err < 0) goto exit;
  39.364 +    }
  39.365 +  exit:
  39.366 +    *val = (err ? -1 : sock);
  39.367 +    if(err) eprintf("> err=%d errno=%d\n", err, errno);
  39.368 +    return err;
  39.369 +}
  39.370 +
  39.371 +/** Open the varp multicast socket.
  39.372 + *
  39.373 + * @param mcaddr multicast address 
  39.374 + * @param saddr address 
  39.375 + * @param port port
  39.376 + * @param val return parameter for the socket
  39.377 + * @return 0 on success, error code otherwise
  39.378 + */
  39.379 +int varp_mcast_open(uint32_t mcaddr, uint32_t saddr, uint16_t port, int *val){
  39.380 +    int err = 0;
  39.381 +    int flags = VSOCK_REUSE;
  39.382 +    int multicast = MULTICAST(mcaddr);
  39.383 +    int sock = 0;
  39.384 +    struct sockaddr_in addr_in;
  39.385 +    struct sockaddr *addr = (struct sockaddr *)&addr_in;
  39.386 +    int addr_n = sizeof(addr_in);
  39.387 +    
  39.388 +    dprintf(">\n");
  39.389 +    flags |= VSOCK_MULTICAST;
  39.390 +    flags |= VSOCK_BROADCAST;
  39.391 +    
  39.392 +    err = create_socket(SOCK_DGRAM, mcaddr, port, flags, &sock);
  39.393 +    if(err < 0) goto exit;
  39.394 +    if(multicast){
  39.395 +        err = setsock_multicast_ttl(sock, 1);
  39.396 +        if(err < 0) goto exit;
  39.397 +    }
  39.398 +    if(0){
  39.399 +        addr_in.sin_family      = AF_INET;
  39.400 +        addr_in.sin_addr.s_addr = saddr;
  39.401 +        addr_in.sin_port        = port;
  39.402 +        err = bind(sock, addr, addr_n);
  39.403 +        if(err < 0){
  39.404 +            eprintf("> bind: %d %d\n", err, errno);
  39.405 +            goto exit;
  39.406 +        }
  39.407 +    }
  39.408 +    if(0){
  39.409 +        struct sockaddr_in self = {};
  39.410 +        int self_n;
  39.411 +        getsockname(sock, (struct sockaddr *)&self, &self_n);
  39.412 +        dprintf("> sockname sock=%d addr=%u.%u.%u.%u port=%d\n",
  39.413 +                sock, NIPQUAD(saddr), ntohs(port));
  39.414 +    }
  39.415 +  exit:
  39.416 +    if(err){
  39.417 +        shutdown(sock, 2);
  39.418 +    }
  39.419 +    *val = (err ? -1 : sock);
  39.420 +    dprintf("< err=%d val=%d\n", err, *val);
  39.421 +    return err;
  39.422 +}
  39.423 +
  39.424 +/** Open the varp unicast socket.
  39.425 + *
  39.426 + * @param addr address 
  39.427 + * @param port port
  39.428 + * @param val return parameter for the socket
  39.429 + * @return 0 on success, error code otherwise
  39.430 + */
  39.431 +int varp_ucast_open(uint32_t addr, u16 port, int *val){
  39.432 +    int err = 0;
  39.433 +    int flags = VSOCK_BIND | VSOCK_REUSE;
  39.434 +    dprintf(">\n");
  39.435 +    err = create_socket(SOCK_DGRAM, addr, port, flags, val);
  39.436 +    dprintf("< err=%d val=%d\n", err, *val);
  39.437 +    return err;
  39.438 +}
  39.439 +
  39.440 +/* Here because inline in 'socket.c'. */
  39.441 +#ifndef sockfd_put
  39.442 +#define sockfd_put(sock) fput((sock)->file)
  39.443 +#endif
  39.444 +
  39.445 +/** Get the next skb from a socket's receive queue.
  39.446 + *
  39.447 + * @param fd socket file descriptor
  39.448 + * @return skb or NULL
  39.449 + */
  39.450 +static struct sk_buff *get_sock_skb(int fd){
  39.451 +    int err = 0;
  39.452 +    struct sk_buff *skb = NULL;
  39.453 +    struct socket *sock = NULL;
  39.454 +
  39.455 +    sock = sockfd_lookup(fd, &err);
  39.456 +    if (!sock){
  39.457 +        dprintf("> no sock for fd=%d\n", fd);
  39.458 +        goto exit;
  39.459 +    }
  39.460 +    skb = skb_dequeue(&sock->sk->SK_RECEIVE_QUEUE);
  39.461 +    //skb = skb_recv_datagram(sock->sk, 0, 1, &recv_err);
  39.462 +    sockfd_put(sock);
  39.463 +  exit:
  39.464 +    return skb;
  39.465 +}
  39.466 +
  39.467 +/** Handle the next skb on a socket (if any).
  39.468 + *
  39.469 + * @param fd socket file descriptor
  39.470 + * @return 1 if there was an skb, 0 otherwise
  39.471 + */
  39.472 +static int handle_sock_skb(int fd){
  39.473 +    int ret = 0;
  39.474 +    struct sk_buff *skb = get_sock_skb(fd);
  39.475 +    if(skb){
  39.476 +        ret = 1;
  39.477 +        dprintf("> skb fd=%d skb=%p\n", fd, skb);
  39.478 +        varp_handle_message(skb);
  39.479 +        kfree_skb(skb);
  39.480 +    }
  39.481 +    return ret;
  39.482 +}
  39.483 +
  39.484 +/** Add a wait queue to a socket.
  39.485 + *
  39.486 + * @param fd socket file descriptor
  39.487 + * @param waitq queue
  39.488 + * @return 0 on success, error code otherwise
  39.489 + */
  39.490 +int sock_add_wait_queue(int fd, wait_queue_t *waitq){
  39.491 +    int err = 0;
  39.492 +    struct socket *sock = NULL;
  39.493 +
  39.494 +    dprintf("> fd=%d\n", fd);
  39.495 +    sock = sockfd_lookup(fd, &err);
  39.496 +    if (!sock) goto exit;
  39.497 +    add_wait_queue(sock->sk->SK_SLEEP, waitq);
  39.498 +    sockfd_put(sock);
  39.499 +  exit:
  39.500 +    dprintf("< err=%d\n", err);
  39.501 +    return err;
  39.502 +}
  39.503 +
  39.504 +/** Remove a wait queue from a socket.
  39.505 + *
  39.506 + * @param fd socket file descriptor
  39.507 + * @param waitq queue
  39.508 + * @return 0 on success, error code otherwise
  39.509 + */
  39.510 +int sock_remove_wait_queue(int fd, wait_queue_t *waitq){
  39.511 +    int err = 0;
  39.512 +    struct socket *sock = NULL;
  39.513 +
  39.514 +    sock = sockfd_lookup(fd, &err);
  39.515 +    if (!sock) goto exit;
  39.516 +    remove_wait_queue(sock->sk->SK_SLEEP, waitq);
  39.517 +    sockfd_put(sock);
  39.518 +  exit:
  39.519 +    return err;
  39.520 +}
  39.521 +
  39.522 +/** Loop handling the varp sockets.
  39.523 + * We use kernel API for this (waitqueue, schedule_timeout) instead
  39.524 + * of select because the select syscall was returning EFAULT. Oh well.
  39.525 + *
  39.526 + * @param arg arguments
  39.527 + * @return exit code
  39.528 + */
  39.529 +int varp_main(void *arg){
  39.530 +    int err = 0;
  39.531 +    long timeout = 3 * HZ;
  39.532 +    int count = 0;
  39.533 +    int n = 0;
  39.534 +    DECLARE_WAITQUEUE(mcast_wait, current);
  39.535 +    DECLARE_WAITQUEUE(ucast_wait, current);
  39.536 +
  39.537 +    dprintf("> start\n");
  39.538 +    atomic_set(&varp_running, 1);
  39.539 +    err = sock_add_wait_queue(varp_mcast_sock, &mcast_wait);
  39.540 +    err = sock_add_wait_queue(varp_ucast_sock, &ucast_wait);
  39.541 +    for(n = 1; atomic_read(&varp_run) == 1; n++){
  39.542 +        //dprintf("> n=%d\n", n);
  39.543 +        count = 0;
  39.544 +        count += handle_sock_skb(varp_mcast_sock);
  39.545 +        count += handle_sock_skb(varp_ucast_sock);
  39.546 +        if(!count){
  39.547 +            // No skbs were handled, so go back to sleep.
  39.548 +            set_current_state(TASK_INTERRUPTIBLE);
  39.549 +            schedule_timeout(timeout);
  39.550 +            current->state = TASK_RUNNING;
  39.551 +        }
  39.552 +    }
  39.553 +    sock_remove_wait_queue(varp_mcast_sock, &mcast_wait);
  39.554 +    sock_remove_wait_queue(varp_ucast_sock, &ucast_wait);
  39.555 +    atomic_set(&varp_running, 0);
  39.556 +    //MOD_DEC_USE_COUNT;
  39.557 +    dprintf("< stop err=%d\n", err);
  39.558 +    return err;
  39.559 +}
  39.560 +
  39.561 +/** Start the varp thread.
  39.562 + *
  39.563 + * @return 0 on success, error code otherwise
  39.564 + */
  39.565 +int varp_start(void){
  39.566 +    int err = 0;
  39.567 +    void *args = NULL;
  39.568 +    int flags = 0;
  39.569 +    long pid = 0;
  39.570 +    
  39.571 +    dprintf(">\n");
  39.572 +    //flags |= CLONE_VM;
  39.573 +    flags |= CLONE_FS;
  39.574 +    flags |= CLONE_FILES;
  39.575 +    flags |= CLONE_SIGHAND;
  39.576 +    atomic_set(&varp_run, 1);
  39.577 +    atomic_set(&varp_running, 0);
  39.578 +    pid = kernel_thread(varp_main, args, flags);
  39.579 +    dprintf("< pid=%ld\n", pid);
  39.580 +    return err;
  39.581 +}
  39.582 +
  39.583 +/** Close the varp sockets and stop the thread handling them.
  39.584 + */
  39.585 +void varp_close(void){
  39.586 +    mm_segment_t oldfs;
  39.587 +    long timeout = 1 * HZ;
  39.588 +    int tries = 10;
  39.589 +    dprintf(">\n");
  39.590 +    // Tell the varp thread to stop and wait a while for it.
  39.591 +    atomic_set(&varp_run, 0);
  39.592 +    while(atomic_read(&varp_running) && tries-- > 0){
  39.593 +        set_current_state(TASK_INTERRUPTIBLE);
  39.594 +        schedule_timeout(timeout);
  39.595 +        current->state = TASK_RUNNING;
  39.596 +    }
  39.597 +    // Close the sockets.
  39.598 +    oldfs = change_fs(KERNEL_DS);
  39.599 +    if(varp_mcast_sock > 0){
  39.600 +        shutdown(varp_mcast_sock, 2);
  39.601 +        varp_mcast_sock = -1;
  39.602 +    }
  39.603 +    if(varp_ucast_sock > 0){
  39.604 +        shutdown(varp_ucast_sock, 2);
  39.605 +        varp_ucast_sock = -1;
  39.606 +    }
  39.607 +    set_fs(oldfs);
  39.608 +    //MOD_DEC_USE_COUNT;
  39.609 +    dprintf("<\n");
  39.610 +}    
  39.611 +
  39.612 +/** Open the varp sockets and start the thread handling them.
  39.613 + *
  39.614 + * @param mcaddr multicast address
  39.615 + * @param addr unicast address
  39.616 + * @param port port
  39.617 + * @return 0 on success, error code otherwise
  39.618 + */
  39.619 +int varp_open(u32 mcaddr, u32 addr, u16 port){
  39.620 +    int err = 0;
  39.621 +    mm_segment_t oldfs;
  39.622 +
  39.623 +    //MOD_INC_USE_COUNT;
  39.624 +    dprintf("> mcaddr=%u.%u.%u.%u addr=%u.%u.%u.%u port=%u\n",
  39.625 +            NIPQUAD(mcaddr), NIPQUAD(addr), ntohs(port));
  39.626 +    //MOD_INC_USE_COUNT;
  39.627 +    oldfs = change_fs(KERNEL_DS);
  39.628 +    err = varp_mcast_open(mcaddr, addr, port, &varp_mcast_sock);
  39.629 +    if(err < 0 ) goto exit;
  39.630 +    err = varp_ucast_open(INADDR_ANY, port, &varp_ucast_sock);
  39.631 +    if(err < 0 ) goto exit;
  39.632 +    set_fs(oldfs);
  39.633 +    err = varp_start();
  39.634 +  exit:
  39.635 +    set_fs(oldfs);
  39.636 +    if(err){
  39.637 +        varp_close();
  39.638 +    }
  39.639 +    dprintf("< err=%d\n", err);
  39.640 +    return err;
  39.641 +}	
  39.642 +
    40.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    40.2 +++ b/tools/vnet/vnet-module/vif.c	Mon Nov 22 16:49:15 2004 +0000
    40.3 @@ -0,0 +1,267 @@
    40.4 +/*
    40.5 + * Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
    40.6 + *
    40.7 + * This program is free software; you can redistribute it and/or modify
    40.8 + * it under the terms of the GNU General Public License as published by the 
    40.9 + * Free Software Foundation; either version 2 of the License, or (at your
   40.10 + * option) any later version.
   40.11 + * 
   40.12 + * This program is distributed in the hope that it will be useful, but
   40.13 + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
   40.14 + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
   40.15 + * for more details.
   40.16 + *
   40.17 + * You should have received a copy of the GNU General Public License along
   40.18 + * with this program; if not, write to the Free software Foundation, Inc.,
   40.19 + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA
   40.20 + *
   40.21 + */
   40.22 +
   40.23 +#include <linux/config.h>
   40.24 +#include <linux/kernel.h>
   40.25 +#include <linux/module.h>
   40.26 +#include <linux/init.h>
   40.27 +#include <linux/string.h>
   40.28 +
   40.29 +#include <linux/net.h>
   40.30 +#include <linux/in.h>
   40.31 +#include <linux/inet.h>
   40.32 +#include <linux/netdevice.h>
   40.33 +#include <linux/udp.h>
   40.34 +
   40.35 +#include <net/ip.h>
   40.36 +#include <net/protocol.h>
   40.37 +#include <net/route.h>
   40.38 +#include <linux/skbuff.h>
   40.39 +
   40.40 +#include <etherip.h>
   40.41 +#include <if_varp.h>
   40.42 +#include <vnet_dev.h>
   40.43 +#include <vif.h>
   40.44 +#include "allocate.h"
   40.45 +#include "hash_table.h"
   40.46 +#include "sys_net.h"
   40.47 +#include "sys_string.h"
   40.48 +
   40.49 +#define MODULE_NAME "VNET"
   40.50 +#define DEBUG 1
   40.51 +#undef DEBUG
   40.52 +#include "debug.h"
   40.53 +
   40.54 +/** Table of vifs indexed by VifKey. */
   40.55 +HashTable *vif_table = NULL;
   40.56 +
   40.57 +void vif_decref(Vif *vif){
   40.58 +    if(!vif) return;
   40.59 +    if(atomic_dec_and_test(&vif->refcount)){
   40.60 +        kfree(vif);
   40.61 +    }
   40.62 +}
   40.63 +
   40.64 +void vif_incref(Vif *vif){
   40.65 +    if(!vif) return;
   40.66 +    atomic_inc(&vif->refcount);
   40.67 +}
   40.68 +
   40.69 +/** Hash function for keys in the vif table.
   40.70 + * Hashes the vnet id and mac.
   40.71 + *
   40.72 + * @param k key (VifKey)
   40.73 + * @return hashcode
   40.74 + */
   40.75 +Hashcode vif_key_hash_fn(void *k){
   40.76 +    VifKey *key = k;
   40.77 +    Hashcode h;
   40.78 +    h = hash_2ul(key->vnet,
   40.79 +                 (key->vmac.mac[0] << 24) |
   40.80 +                 (key->vmac.mac[1] << 16) |
   40.81 +                 (key->vmac.mac[2] <<  8) |
   40.82 +                 (key->vmac.mac[3]      ));
   40.83 +    h = hash_hul(h, 
   40.84 +                 (key->vmac.mac[4] <<   8) |
   40.85 +                 (key->vmac.mac[5]       ));
   40.86 +    return h;
   40.87 +}
   40.88 +
   40.89 +
   40.90 +/** Test equality for keys in the vif table.
   40.91 + * Compares vnet and mac.
   40.92 + *
   40.93 + * @param k1 key to compare (VifKey)
   40.94 + * @param k2 key to compare (VifKey)
   40.95 + * @return 1 if equal, 0 otherwise
   40.96 + */
   40.97 +int vif_key_equal_fn(void *k1, void *k2){
   40.98 +    VifKey *key1 = k1;
   40.99 +    VifKey *key2 = k2;
  40.100 +    return (key1->vnet == key2->vnet) && (memcmp(key1->vmac.mac, key2->vmac.mac, ETH_ALEN) == 0);
  40.101 +}
  40.102 +
  40.103 +/** Free an entry in the vif table.
  40.104 + *
  40.105 + * @param table containing table
  40.106 + * @param entry entry to free
  40.107 + */
  40.108 +static void vif_entry_free_fn(HashTable *table, HTEntry *entry){
  40.109 +    Vif *vif;
  40.110 +    if(!entry) return;
  40.111 +    vif = entry->value;
  40.112 +    if(vif){
  40.113 +        vif_decref(vif);
  40.114 +    }
  40.115 +    HTEntry_free(entry);
  40.116 +}
  40.117 +
  40.118 +/** Lookup a vif.
  40.119 + *
  40.120 + * @param vnet vnet id
  40.121 + * @param mac MAC address
  40.122 + * @return 0 on success, -ENOENT otherwise
  40.123 + */
  40.124 +int vif_lookup(int vnet, Vmac *vmac, Vif **vif){
  40.125 +    int err = 0;
  40.126 +    VifKey key = {};
  40.127 +    HTEntry *entry = NULL;
  40.128 +    
  40.129 +    key.vnet = vnet;
  40.130 +    key.vmac = *vmac;
  40.131 +    entry = HashTable_get_entry(vif_table, &key);
  40.132 +    if(entry){
  40.133 +        *vif = entry->value;
  40.134 +        vif_incref(*vif);
  40.135 +    } else {
  40.136 +        *vif = NULL;
  40.137 +        err = -ENOENT;
  40.138 +    }
  40.139 +    //dprintf("< err=%d addr=" IPFMT "\n", err, NIPQUAD(*coaddr));
  40.140 +    return err;
  40.141 +}
  40.142 +
  40.143 +/** Create a new vif.
  40.144 + *
  40.145 + * @param vnet vnet id
  40.146 + * @param mac MAC address
  40.147 + * @return 0 on success, negative error code otherwise
  40.148 + */
  40.149 +int vif_add(int vnet, Vmac *vmac, Vif **val){
  40.150 +    int err = 0;
  40.151 +    Vif *vif = NULL;
  40.152 +    HTEntry *entry;
  40.153 +    dprintf("> vnet=%d\n", vnet);
  40.154 +    vif = ALLOCATE(Vif);
  40.155 +    if(!vif){
  40.156 +        err = -ENOMEM;
  40.157 +        goto exit;
  40.158 +    }
  40.159 +    atomic_set(&vif->refcount, 1);
  40.160 +    vif->vnet = vnet;
  40.161 +    vif->vmac = *vmac;
  40.162 +    entry = HashTable_add(vif_table, vif, vif);
  40.163 +    if(!entry){
  40.164 +        err = -ENOMEM;
  40.165 +        deallocate(vif);
  40.166 +        vif = NULL;
  40.167 +        goto exit;
  40.168 +    }
  40.169 +    vif_incref(vif);
  40.170 +  exit:
  40.171 +    *val = (err ? NULL : vif);
  40.172 +    dprintf("< err=%d\n", err);
  40.173 +    return err;
  40.174 +}
  40.175 +
  40.176 +/** Delete an entry.
  40.177 + *
  40.178 + * @param vnet vnet id
  40.179 + * @param mac MAC address
  40.180 + * @param coaddr return parameter for care-of address
  40.181 + * @return number of entries deleted, or negative error code
  40.182 + */
  40.183 +int vif_remove(int vnet, Vmac *vmac){
  40.184 +    int err = 0;
  40.185 +    VifKey key = { .vnet = vnet, .vmac = *vmac };
  40.186 +    //dprintf("> vnet=%d addr=%u.%u.%u.%u\n", vnet, NIPQUAD(coaddr));
  40.187 +    err = HashTable_remove(vif_table, &key);
  40.188 +    //dprintf("< err=%d\n", err);
  40.189 +    return err;
  40.190 +}
  40.191 +
  40.192 +int vif_find(int vnet, Vmac *vmac, int create, Vif **vif){
  40.193 +    int err = 0;
  40.194 +
  40.195 +    err = vif_lookup(vnet, vmac, vif);
  40.196 +    if(err && create){
  40.197 +        err = vif_add(vnet, vmac, vif);
  40.198 +    }
  40.199 +    return err;
  40.200 +}
  40.201 +
  40.202 +void vif_purge(void){
  40.203 +    HashTable_clear(vif_table);
  40.204 +}
  40.205 +
  40.206 +int vif_create(int vnet, Vmac *vmac, Vif **vif){
  40.207 +    int err = 0;
  40.208 +
  40.209 +    dprintf(">\n");
  40.210 +    if(!vif_lookup(vnet, vmac, vif)){
  40.211 +        err = -EEXIST;
  40.212 +        goto exit;
  40.213 +    }
  40.214 +    dprintf("> vif_add...\n");
  40.215 +    err = vif_add(vnet, vmac, vif);
  40.216 +  exit:
  40.217 +    if(err){
  40.218 +        *vif = NULL;
  40.219 +    }
  40.220 +    dprintf("< err=%d\n", err);
  40.221 +    return err;
  40.222 +}
  40.223 +
  40.224 +/** Create a vif.
  40.225 + *
  40.226 + * @param vnet vnet id
  40.227 + * @param mac mac address (as a string)
  40.228 + * @return 0 on success, error code otherwise
  40.229 + */
  40.230 +int mkvif(int vnet, char *mac){
  40.231 +    int err = 0;
  40.232 +    Vmac vmac = {};
  40.233 +    Vif *vif = NULL;
  40.234 +    dprintf("> vnet=%d mac=%s\n", vnet, mac);
  40.235 +    err = mac_aton(mac, vmac.mac);
  40.236 +    if(err) goto exit;
  40.237 +    err = vif_create(vnet, &vmac, &vif);
  40.238 +  exit:
  40.239 +    dprintf("< err=%d\n", err);
  40.240 +    return err;
  40.241 +}
  40.242 +
  40.243 +/** Initialize the vif table.
  40.244 + *
  40.245 + * @return 0 on success, error code otherwise
  40.246 + */
  40.247 +int vif_init(void){
  40.248 +    int err = 0;
  40.249 +    dprintf(">\n");
  40.250 +    vif_table = HashTable_new(0);
  40.251 +    if(!vif_table){
  40.252 +        err = -ENOMEM;
  40.253 +        goto exit;
  40.254 +    }
  40.255 +    vif_table->entry_free_fn = vif_entry_free_fn;
  40.256 +    vif_table->key_hash_fn = vif_key_hash_fn;
  40.257 +    vif_table->key_equal_fn = vif_key_equal_fn;
  40.258 +
  40.259 +    // Some vifs for testing.
  40.260 +    //mkvif(1, "aa:00:00:00:20:11");
  40.261 +    //mkvif(2, "aa:00:00:00:20:12");
  40.262 +  exit:
  40.263 +    if(err < 0) wprintf("< err=%d\n", err);
  40.264 +    dprintf("< err=%d\n", err);
  40.265 +    return err;
  40.266 +}
  40.267 +
  40.268 +void vif_exit(void){
  40.269 +    HashTable_free(vif_table);
  40.270 +}
    41.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    41.2 +++ b/tools/vnet/vnet-module/vif.h	Mon Nov 22 16:49:15 2004 +0000
    41.3 @@ -0,0 +1,55 @@
    41.4 +/*
    41.5 + * Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
    41.6 + *
    41.7 + * This program is free software; you can redistribute it and/or modify
    41.8 + * it under the terms of the GNU General Public License as published by the 
    41.9 + * Free Software Foundation; either version 2 of the License, or (at your
   41.10 + * option) any later version.
   41.11 + * 
   41.12 + * This program is distributed in the hope that it will be useful, but
   41.13 + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
   41.14 + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
   41.15 + * for more details.
   41.16 + *
   41.17 + * You should have received a copy of the GNU General Public License along
   41.18 + * with this program; if not, write to the Free software Foundation, Inc.,
   41.19 + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA
   41.20 + *
   41.21 + */
   41.22 +#ifndef _VNET_VIF_H_
   41.23 +#define _VNET_VIF_H_
   41.24 +
   41.25 +#include <if_varp.h>
   41.26 +struct net_device;
   41.27 +
   41.28 +/** Key for entries in the vif table. */
   41.29 +typedef struct VifKey {
   41.30 +    int vnet;
   41.31 +    Vmac vmac;
   41.32 +} VifKey;
   41.33 +
   41.34 +typedef struct Vif {
   41.35 +    int vnet;
   41.36 +    Vmac vmac;
   41.37 +    struct net_device *dev;
   41.38 +    atomic_t refcount;
   41.39 +} Vif;
   41.40 +
   41.41 +struct HashTable;
   41.42 +extern struct HashTable *vif_table;
   41.43 +
   41.44 +extern void vif_decref(Vif *vif);
   41.45 +extern void vif_incref(Vif *vif);
   41.46 +
   41.47 +extern int vif_create(int vnet, Vmac *vmac, Vif **vif);
   41.48 +
   41.49 +extern int vif_add(int vnet, Vmac *vmac, Vif **vif);
   41.50 +extern int vif_lookup(int vnet, Vmac *vmac, Vif **vif);
   41.51 +extern int vif_remove(int vnet, Vmac *vmac);
   41.52 +extern int vif_find(int vnet, Vmac *vmac, int create, Vif **vif);
   41.53 +extern void vif_purge(void);
   41.54 +
   41.55 +extern int vif_init(void);
   41.56 +extern void vif_exit(void);
   41.57 +
   41.58 +#endif
    42.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    42.2 +++ b/tools/vnet/vnet-module/vnet.c	Mon Nov 22 16:49:15 2004 +0000
    42.3 @@ -0,0 +1,767 @@
    42.4 +/*
    42.5 + * Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
    42.6 + *
    42.7 + * This program is free software; you can redistribute it and/or modify
    42.8 + * it under the terms of the GNU General Public License as published by the 
    42.9 + * Free Software Foundation; either version 2 of the License, or (at your
   42.10 + * option) any later version.
   42.11 + * 
   42.12 + * This program is distributed in the hope that it will be useful, but
   42.13 + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
   42.14 + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
   42.15 + * for more details.
   42.16 + *
   42.17 + * You should have received a copy of the GNU General Public License along
   42.18 + * with this program; if not, write to the Free software Foundation, Inc.,
   42.19 + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA
   42.20 + *
   42.21 + */
   42.22 +
   42.23 +#include <linux/config.h>
   42.24 +#include <linux/module.h>
   42.25 +#include <linux/types.h>
   42.26 +#include <linux/kernel.h>
   42.27 +#include <linux/version.h>
   42.28 +#include <linux/errno.h>
   42.29 +
   42.30 +#include <linux/string.h>
   42.31 +
   42.32 +#include <linux/net.h>
   42.33 +#include <linux/in.h>
   42.34 +#include <linux/inet.h>
   42.35 +#include <linux/netdevice.h>
   42.36 +
   42.37 +#include <linux/etherdevice.h>
   42.38 +#include <net/ip.h>
   42.39 +#include <net/protocol.h>
   42.40 +#include <net/route.h>
   42.41 +#include <linux/skbuff.h>
   42.42 +#include <net/checksum.h>
   42.43 +
   42.44 +#include <tunnel.h>
   42.45 +#include <sa.h>
   42.46 +#include <varp.h>
   42.47 +#include <if_varp.h>
   42.48 +#include <esp.h>
   42.49 +#include <etherip.h>
   42.50 +#include <random.h>
   42.51 +#include <tunnel.h>
   42.52 +
   42.53 +#include <vnet_dev.h>
   42.54 +#include <vnet.h>
   42.55 +#include <vif.h>
   42.56 +#include <vnet_ioctl.h>
   42.57 +#include <sa_algorithm.h>
   42.58 +
   42.59 +#include "allocate.h"
   42.60 +#include "hash_table.h"
   42.61 +#include "sys_net.h"
   42.62 +#include "sys_string.h"
   42.63 +
   42.64 +#define MODULE_NAME "VNET"
   42.65 +#define DEBUG 1
   42.66 +#undef DEBUG
   42.67 +#include "debug.h"
   42.68 +
   42.69 +/** Default vnet security level.
   42.70 + */
   42.71 +int vnet_security_default = SA_AUTH ; //| SA_CONF;
   42.72 +
   42.73 +/** Key for entries in the vnet address table. */
   42.74 +typedef struct VnetAddrKey {
   42.75 +    /** Vnet id. */
   42.76 +    int vnet;
   42.77 +    /** MAC address. */
   42.78 +    unsigned char mac[ETH_ALEN];
   42.79 +} VnetAddrKey;
   42.80 +
   42.81 +/** The physical vnet. */
   42.82 +Vnet *vnet_physical = NULL;
   42.83 +
   42.84 +/** Table of vnets indexed by id. */
   42.85 +static HashTable *vnet_table = NULL;
   42.86 +
   42.87 +/** Decrement reference count, freeing if zero.
   42.88 + *
   42.89 + * @param info vnet (OK if null)
   42.90 + */
   42.91 +void Vnet_decref(Vnet *info){
   42.92 +    if(!info) return;
   42.93 +    if(atomic_dec_and_test(&info->refcount)){
   42.94 +        dprintf("> free vnet=%u\n", info->vnet);
   42.95 +        vnet_dev_remove(info);
   42.96 +        deallocate(info);
   42.97 +    }
   42.98 +}
   42.99 +
  42.100 +/** Increment reference count.
  42.101 + *
  42.102 + * @param info vnet (OK if null)
  42.103 + */
  42.104 +void Vnet_incref(Vnet *info){
  42.105 +    if(!info) return;
  42.106 +    atomic_inc(&info->refcount);
  42.107 +}
  42.108 +
  42.109 +/** Allocate a vnet, setting reference count to 1.
  42.110 + *
  42.111 + * @param info return parameter for vnet
  42.112 + * @return 0 on success, error code otherwise
  42.113 + */
  42.114 +int Vnet_alloc(Vnet **info){
  42.115 +    int err = 0;
  42.116 +    *info = ALLOCATE(Vnet);
  42.117 +    if(*info){
  42.118 +        atomic_set(&(*info)->refcount, 1);
  42.119 +    } else {
  42.120 +        err = -ENOMEM;
  42.121 +    }
  42.122 +    return err;
  42.123 +}
  42.124 +
  42.125 +/** Add a vnet to the table under its vnet id.
  42.126 + *
  42.127 + * @param info vnet to add
  42.128 + * @return 0 on success, error code otherwise
  42.129 + */
  42.130 +int Vnet_add(Vnet *info){
  42.131 +    int err = 0;
  42.132 +    HTEntry *entry = NULL;
  42.133 +    // Vnet_del(info->vnet); //todo: Delete existing vnet info?
  42.134 +    Vnet_incref(info);
  42.135 +    entry = HashTable_add(vnet_table, HKEY(info->vnet), info);
  42.136 +    if(!entry){
  42.137 +        err = -ENOMEM;
  42.138 +        Vnet_decref(info);
  42.139 +    }
  42.140 +    return err;
  42.141 +}
  42.142 +
  42.143 +/** Remove a vnet from the table.
  42.144 + *
  42.145 + * @param vnet id of vnet to remove
  42.146 + * @return number of vnets removed
  42.147 + */
  42.148 +int Vnet_del(vnetid_t vnet){
  42.149 +    return HashTable_remove(vnet_table, HKEY(vnet));
  42.150 +}
  42.151 +
  42.152 +/** Lookup a vnet by id.
  42.153 + * References the vnet on success - the caller must decref.
  42.154 + *
  42.155 + * @param vnet vnet id
  42.156 + * @param info return parameter for vnet
  42.157 + * @return 0 on sucess, -ENOENT if no vnet found
  42.158 + */
  42.159 +int Vnet_lookup(vnetid_t vnet, Vnet **info){
  42.160 +    int err = 0;
  42.161 +    dprintf("> vnet=%u info=%p\n", vnet, info);
  42.162 +    dprintf("> vnet_table=%p\n",vnet_table); 
  42.163 +    *info = HashTable_get(vnet_table, HKEY(vnet));
  42.164 +    if(*info){
  42.165 +        Vnet_incref(*info);
  42.166 +    } else {
  42.167 +        err = -ENOENT;
  42.168 +    }
  42.169 +    dprintf("< err=%d\n", err);
  42.170 +    return err;
  42.171 +}
  42.172 +
  42.173 +/** Free an entry in the vnet table.
  42.174 + *
  42.175 + * @param table containing table
  42.176 + * @param entry to free
  42.177 + */
  42.178 +static void vnet_entry_free_fn(HashTable *table, HTEntry *entry){
  42.179 +    Vnet *info;
  42.180 +    if(!entry) return;
  42.181 +    info = entry->value;
  42.182 +    if(info){
  42.183 +        vnet_dev_remove(info);
  42.184 +        Vnet_decref(info);
  42.185 +    }
  42.186 +    HTEntry_free(entry);
  42.187 +}
  42.188 +
  42.189 +/** Setup some vnet entries (for testing).
  42.190 + * Vnet 1 is physical, vnets 2 to 10 are insecure, vnets above
  42.191 + * 10 are secure.
  42.192 + *
  42.193 + * @return 0 on success, negative error code otherwise
  42.194 + */
  42.195 +static int vnet_setup(void){
  42.196 +    int err = 0;
  42.197 +    int i, n = 5; //20;
  42.198 +    int security = vnet_security_default;
  42.199 +    Vnet *vnet;
  42.200 +
  42.201 +    dprintf(">\n");
  42.202 +    for(i=0; i<n; i++){
  42.203 +        err = Vnet_alloc(&vnet);
  42.204 +        if(err) break;
  42.205 +        vnet->vnet = VNET_VIF + i;
  42.206 +        vnet->security = (vnet->vnet > 10 ? security : 0);
  42.207 +        //err = Vnet_add(vnet);
  42.208 +        err = Vnet_create(vnet);
  42.209 +        if(err) break;
  42.210 +    }
  42.211 +    dprintf("< err=%d\n", err);
  42.212 +    return err;
  42.213 +}
  42.214 +
  42.215 +/** Initialize the vnet table and the physical vnet.
  42.216 + *
  42.217 + * @return 0 on success, error code otherwise
  42.218 + */
  42.219 +int vnet_init(void){
  42.220 +    int err = 0;
  42.221 +
  42.222 +    dprintf(">\n");
  42.223 +    vnet_table = HashTable_new(0);
  42.224 +    dprintf("> vnet_table=%p\n", vnet_table);
  42.225 +    if(!vnet_table){
  42.226 +        err = -ENOMEM;
  42.227 +        goto exit;
  42.228 +    }
  42.229 +    vnet_table->entry_free_fn = vnet_entry_free_fn;
  42.230 +
  42.231 +    err = Vnet_alloc(&vnet_physical);
  42.232 +    if(err) goto exit;
  42.233 +    vnet_physical->vnet = VNET_PHYS;
  42.234 +    vnet_physical->security = 0;
  42.235 +    err = Vnet_add(vnet_physical);
  42.236 +    if(err) goto exit;
  42.237 +    err = vnet_setup();
  42.238 +    if(err) goto exit;
  42.239 +    err = varp_init();
  42.240 +    if(err) goto exit;
  42.241 +    err = vif_init();
  42.242 +  exit:
  42.243 +    if(err < 0) wprintf("< err=%d\n", err);
  42.244 +    return err;
  42.245 +}
  42.246 +
  42.247 +void vnet_exit(void){
  42.248 +    vif_exit();
  42.249 +    varp_exit();
  42.250 +    HashTable_free(vnet_table);
  42.251 +    vnet_table = NULL;
  42.252 +}
  42.253 +
  42.254 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
  42.255 +
  42.256 +static inline int skb_route(struct sk_buff *skb, struct rtable **prt){
  42.257 +    int err = 0;
  42.258 +    struct flowi fl = {
  42.259 +        .oif = skb->dev->ifindex,
  42.260 +        .nl_u = {
  42.261 +            .ip4_u = {
  42.262 +                .daddr = skb->nh.iph->daddr,
  42.263 +                .saddr = skb->nh.iph->saddr,
  42.264 +                .tos   = skb->nh.iph->tos,
  42.265 +            }
  42.266 +        }
  42.267 +    };
  42.268 +    
  42.269 +    err = ip_route_output_key(prt, &fl);
  42.270 +    return err;
  42.271 +}
  42.272 +
  42.273 +#else
  42.274 +
  42.275 +static inline int skb_route(struct sk_buff *skb, struct rtable **prt){
  42.276 +    int err = 0;
  42.277 +    struct rt_key key = { };
  42.278 +    key.dst = skb->nh.iph->daddr;
  42.279 +    key.src = skb->nh.iph->saddr;
  42.280 +    key.tos = skb->nh.iph->tos;
  42.281 +    key.oif = skb->dev->ifindex;
  42.282 +    err = ip_route_output_key(prt, &key);
  42.283 +    return err;
  42.284 +}
  42.285 +
  42.286 +#endif
  42.287 +
  42.288 +inline int skb_xmit(struct sk_buff *skb){
  42.289 +    int err = 0;
  42.290 +    struct rtable *rt = NULL;
  42.291 +
  42.292 +    dprintf("> skb=%p dev=%s\n", skb, skb->dev->name);
  42.293 +
  42.294 +    skb->protocol = htons(ETH_P_IP);
  42.295 +    err = skb_route(skb, &rt);
  42.296 +    if(err) goto exit;
  42.297 +    skb->dst = &rt->u.dst;
  42.298 +
  42.299 +    ip_select_ident(skb->nh.iph, &rt->u.dst, NULL);
  42.300 +
  42.301 +    if(skb->nh.iph->saddr == 0){
  42.302 +        skb->nh.iph->saddr = rt->rt_src;
  42.303 +    }
  42.304 +
  42.305 +    skb->nh.iph->check = 0;
  42.306 +    skb->nh.iph->check = ip_compute_csum(skb->nh.raw, (skb->nh.iph->ihl << 2));
  42.307 +
  42.308 +    err = neigh_compat_output(skb);
  42.309 +
  42.310 +  exit:
  42.311 +    dprintf("< err=%d\n", err);
  42.312 +    return err;
  42.313 +}
  42.314 +
  42.315 +/** Called when a vif sends a packet to the network.
  42.316 + * Encapsulates the packet for its vnet and forwards it.
  42.317 + *
  42.318 + * @param skb packet
  42.319 + * @return 0 on success, error code otherwise
  42.320 + *
  42.321 + * @todo fixme
  42.322 + */
  42.323 +int vnet_skb_send(struct sk_buff *skb, u32 vnet){
  42.324 +    int err = 0;
  42.325 +    Vif *vif = NULL;
  42.326 +
  42.327 +    dprintf("> skb=%p vnet=%u\n", skb, vnet);
  42.328 +    if(vnet == VNET_PHYS || !vnet){
  42.329 +        // For completeness, send direct to the network.
  42.330 +        if(skb->dev){
  42.331 +            err = skb_xmit(skb);
  42.332 +        } else {
  42.333 +            // Can't assume eth0 - might be nbe-br or other. Need to route.
  42.334 +            struct net_device *dev = NULL;
  42.335 +            err = vnet_get_device(DEVICE, &dev);
  42.336 +            if(err) goto exit;
  42.337 +            skb->dev = dev;
  42.338 +            err = skb_xmit(skb);
  42.339 +            dev_put(dev);
  42.340 +        }
  42.341 +    } else {
  42.342 +        dprintf("> varp_output\n");
  42.343 +        err = varp_output(skb, vnet);
  42.344 +    }
  42.345 +    //dprintf("< err=%d\n", err);
  42.346 +  exit:
  42.347 +    if(vif) vif_decref(vif);
  42.348 +    dprintf("< err=%d\n", err);
  42.349 +    return err;
  42.350 +}
  42.351 +
  42.352 +/** Receive an skb for a vnet.
  42.353 + * If the dest is broadcast, goes to all vifs on the vnet.
  42.354 + * If the dest is unicast, goes to addressed vif on vnet.
  42.355 + * For each vif we set the packet dev and receive the packet.
  42.356 + *
  42.357 + * The packet must have skb->mac.raw set and skb->data must point
  42.358 + * after the device (ethernet) header.
  42.359 + *
  42.360 + * @param skb packet
  42.361 + * @param vnet packet vnet
  42.362 + * @param vmac packet vmac
  42.363 + * @return 0 on success, error code otherwise
  42.364 + */
  42.365 +#if 1
  42.366 +int vnet_skb_recv(struct sk_buff *skb, u32 vnet, Vmac *vmac){
  42.367 +    // Receive the skb for a vnet.
  42.368 +    // We make the skb come out of the vif for the vnet, and
  42.369 +    // let ethernet bridging forward it to related interfaces.
  42.370 +    int err = 0;
  42.371 +    Vnet *info = NULL;
  42.372 +
  42.373 +    dprintf("> vnet=%u mac=%s\n", vnet, mac_ntoa(vmac->mac));
  42.374 +    err = Vnet_lookup(vnet, &info);
  42.375 +    if(err) goto exit;
  42.376 +    skb->dev = info->dev;
  42.377 +    dprintf("> netif_rx dev=%s\n", skb->dev->name);
  42.378 +    netif_rx(skb);
  42.379 +  exit:
  42.380 +    if(info) Vnet_decref(info);
  42.381 +    if(err){
  42.382 +      kfree_skb(skb);
  42.383 +    }
  42.384 +    dprintf("< err=%d\n", err);
  42.385 +    return err;
  42.386 +}
  42.387 +
  42.388 +#else
  42.389 +int vnet_skb_recv(struct sk_buff *skb, u32 vnet, Vmac *vmac){
  42.390 +    int err = 0;
  42.391 +    Vif *vif = NULL;
  42.392 +
  42.393 +    dprintf("> vnet=%u mac=%s\n", vnet, mac_ntoa(vmac->mac));
  42.394 +    if(mac_is_multicast(vmac->mac)){
  42.395 +        HashTable_for_decl(entry);
  42.396 +        int count = 0;
  42.397 +        struct sk_buff *new_skb;
  42.398 +
  42.399 +        HashTable_for_each(entry, vif_table){
  42.400 +            vif = entry->value;
  42.401 +            if(vif->vnet != vnet) continue;
  42.402 +            count++;
  42.403 +            new_skb = skb_copy(skb, GFP_ATOMIC);
  42.404 +            if(!new_skb) break;
  42.405 +            new_skb->dev = vif->dev;
  42.406 +            dprintf("> %d] netif_rx dev=%s\n", count, new_skb->dev->name);
  42.407 +            netif_rx(new_skb);
  42.408 +        }
  42.409 +        kfree_skb(skb);
  42.410 +    } else {
  42.411 +        err = vif_lookup(vnet, vmac, &vif);
  42.412 +        if(err){
  42.413 +            kfree_skb(skb);
  42.414 +            goto exit;
  42.415 +        }
  42.416 +        skb->dev = vif->dev;
  42.417 +        dprintf("> netif_rx dev=%s\n", skb->dev->name);
  42.418 +        netif_rx(skb);
  42.419 +    }
  42.420 +  exit:
  42.421 +    dprintf("< err=%d\n", err);
  42.422 +    return err;
  42.423 +}
  42.424 +#endif
  42.425 +   
  42.426 +/** Check validity of an incoming IP frame.
  42.427 + *
  42.428 + * @param skb frame
  42.429 + * @return 0 if ok, error code otherwise
  42.430 + *
  42.431 + * @todo fixme Can prob skip most of this because linux will have done it.
  42.432 + * @todo Only need the vnet skb context check.
  42.433 + */
  42.434 +int check_ip_frame(struct sk_buff *skb){
  42.435 +    int err = -EINVAL;
  42.436 +    struct iphdr* iph;
  42.437 +    struct net_device *dev;
  42.438 +    __u32  len;
  42.439 +    __u16  check;
  42.440 +
  42.441 +#if 0
  42.442 +    if(skb->context){
  42.443 +        // Todo: After ESP want to skip most checks (including checksum),
  42.444 +        // Todo: but in general may not want to skip all checks on detunnel.
  42.445 +        //dprintf("> Skip check, has context\n");
  42.446 +        err = 0;
  42.447 +        goto exit;
  42.448 +    }
  42.449 +#endif
  42.450 +    // Check we have enough for an ip header - the skb passed should
  42.451 +    // have data pointing at the eth header and skb->len should include
  42.452 +    // that. skb->nh should already have been set. Let the indvidual
  42.453 +    // protocol handlers worry about the exact ip header len
  42.454 +    // (i.e. whether any ip options are set).
  42.455 +    dev = skb->dev;
  42.456 +    
  42.457 +    if(skb->len <  ETH_HLEN + sizeof(struct iphdr)){
  42.458 +        wprintf("> packet too short for ip header\n");
  42.459 +        goto exit;
  42.460 +    }
  42.461 +
  42.462 +    iph = skb->nh.iph;
  42.463 +    /*
  42.464 +     *	RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the checksum.
  42.465 +     *
  42.466 +     *	Is the datagram acceptable?
  42.467 +     *
  42.468 +     *	1.	Length at least the size of an ip header
  42.469 +     *	2.	Version of 4
  42.470 +     *	3.	Checksums correctly. [Speed optimisation for later, skip loopback checksums]
  42.471 +     *	4.	Doesn't have a bogus length
  42.472 +     */
  42.473 +    if (iph->ihl < 5 || iph->version != 4){
  42.474 +        wprintf("> len and version check failed\n");
  42.475 +        goto exit;
  42.476 +    }
  42.477 +    if(skb->len < ETH_HLEN + (iph->ihl << 2)){
  42.478 +        wprintf("> packet too short for given ihl\n");
  42.479 +        goto exit;
  42.480 +    }
  42.481 +
  42.482 +    check = iph->check;
  42.483 +    //iph->check = 0;
  42.484 +    //iph->check = compute_cksum((__u16 *)iph, (iph->ihl << 1));
  42.485 +    if(iph->check != check){
  42.486 +        wprintf("> invalid checksum\n");
  42.487 +        goto exit;
  42.488 +    }
  42.489 +
  42.490 +    len = ntohs(iph->tot_len); 
  42.491 +    if (skb->len < len + ETH_HLEN || len < (iph->ihl << 2)){
  42.492 +        wprintf("> packet too short for tot_len\n");
  42.493 +        goto exit;
  42.494 +    }
  42.495 +    skb->h.raw = skb->nh.raw + (iph->ihl << 2);
  42.496 +    err = 0;
  42.497 +  exit:
  42.498 +    return err;
  42.499 +}
  42.500 +
  42.501 +/** Determine ESP security mode for a new SA.
  42.502 + *
  42.503 + * @param spi incoming spi
  42.504 + * @param protocol incoming protocol
  42.505 + * @param addr source address
  42.506 + * @return security level or negative error code
  42.507 + *
  42.508 + * @todo Need to check spi, and do some lookup for security params.
  42.509 + */
  42.510 +int vnet_sa_security(u32 spi, int protocol, u32 addr){
  42.511 +    int security = vnet_security_default;
  42.512 +    dprintf("< security=%x\n", security);
  42.513 +    return security;
  42.514 +}
  42.515 +
  42.516 +/** Create a new SA for incoming traffic.
  42.517 + *
  42.518 + * @param spi incoming spi
  42.519 + * @param protocol incoming protocol
  42.520 + * @param addr source address
  42.521 + * @param sa return parameter for SA
  42.522 + * @return 0 on success, error code otherwise
  42.523 + */
  42.524 +int vnet_sa_create(u32 spi, int protocol, u32 addr, SAState **sa){
  42.525 +    int err = 0;
  42.526 +    int security = vnet_sa_security(spi, protocol, addr);
  42.527 +    if(security < 0){
  42.528 +        err = security;
  42.529 +        goto exit;
  42.530 +    }
  42.531 +    err = sa_create(security, spi, protocol, addr, sa);
  42.532 +  exit:
  42.533 +    return err;
  42.534 +}
  42.535 +
  42.536 +/** Check that a context has the correct properties w.r.t. a vnet.
  42.537 + * The context must be secure if the vnet requires security.
  42.538 + *
  42.539 + * @param vnet vnet id
  42.540 + * @param context context
  42.541 + * @return 0 on success, error code otherwise
  42.542 + *
  42.543 + * @todo Need to check that the sa provides the correct security level.
  42.544 + */
  42.545 +int vnet_check_context(int vnet, SkbContext *context, Vnet **val){
  42.546 +    int err = 0;
  42.547 +    Vnet *info = NULL;
  42.548 +    SAState *sa = NULL;
  42.549 +    
  42.550 +    err = Vnet_lookup(vnet, &info);
  42.551 +    if(err){
  42.552 +        wprintf("> No vnet %d\n", vnet);
  42.553 +        goto exit;
  42.554 +    }
  42.555 +    if(!info->security) goto exit;
  42.556 +    err = -EINVAL;
  42.557 +    if(!context){
  42.558 +        wprintf("> No security context\n");
  42.559 +        goto exit;
  42.560 +    }
  42.561 +    if(context->protocol != IPPROTO_ESP){
  42.562 +        wprintf("> Invalid protocol: wanted %d, got %d\n", IPPROTO_ESP, context->protocol);
  42.563 +        goto exit;
  42.564 +    }
  42.565 +    sa = context->data;
  42.566 +    //todo: Check security properties of the SA are correct w.r.t. the vnet.
  42.567 +    //Something like  sa->security == info->security;
  42.568 +    err = 0;
  42.569 +  exit:
  42.570 +    *val = info;
  42.571 +    return err;
  42.572 +}
  42.573 +
  42.574 +/** Open function for SA tunnels.
  42.575 + *
  42.576 + * @param tunnel to open
  42.577 + * @return 0 on success, error code otherwise
  42.578 + */
  42.579 +static int sa_tunnel_open(Tunnel *tunnel){
  42.580 +    int err = 0;
  42.581 +    //dprintf(">\n");
  42.582 +    //dprintf("< err=%d\n", err);
  42.583 +    return err;
  42.584 +}
  42.585 +
  42.586 +/** Close function for SA tunnels.
  42.587 + *
  42.588 + * @param tunnel to close (OK if null)
  42.589 + */
  42.590 +static void sa_tunnel_close(Tunnel *tunnel){
  42.591 +    SAState *sa;
  42.592 +    dprintf(">\n");
  42.593 +    if(!tunnel) return;
  42.594 +    sa = tunnel->data;
  42.595 +    if(!sa) return;
  42.596 +    SAState_decref(sa);
  42.597 +    tunnel->data = NULL;
  42.598 +    dprintf("<\n");
  42.599 +}
  42.600 +
  42.601 +/** Packet send function for SA tunnels.
  42.602 + *
  42.603 + * @param tunnel to send on
  42.604 + * @param skb packet to send
  42.605 + * @return 0 on success, negative error code on error
  42.606 + */
  42.607 +static int sa_tunnel_send(Tunnel *tunnel, struct sk_buff *skb){
  42.608 +    int err = -EINVAL;
  42.609 +    SAState *sa;
  42.610 +    //dprintf("> tunnel=%p\n", tunnel);
  42.611 +    if(!tunnel){
  42.612 +        wprintf("> Null tunnel!\n");
  42.613 +        goto exit;
  42.614 +    }
  42.615 +    sa = tunnel->data;
  42.616 +    if(!sa){
  42.617 +        wprintf("> Null SA!\n");
  42.618 +        goto exit;
  42.619 +    }
  42.620 +    err = SAState_send(sa, skb, tunnel->base);
  42.621 +  exit:
  42.622 +    //dprintf("< err=%d\n", err);
  42.623 +    return err;
  42.624 +}
  42.625 +
  42.626 +/** Functions used by SA tunnels. */
  42.627 +static TunnelType _sa_tunnel_type = {
  42.628 +    .name	= "SA",
  42.629 +    .open	= sa_tunnel_open,
  42.630 +    .close	= sa_tunnel_close,
  42.631 +    .send 	= sa_tunnel_send
  42.632 +};
  42.633 +
  42.634 +/** Functions used by SA tunnels. */
  42.635 +TunnelType *sa_tunnel_type = &_sa_tunnel_type;
  42.636 +
  42.637 +/** Open a tunnel for a vnet to a given address.
  42.638 + *
  42.639 + * @param vnet vnet id
  42.640 + * @param addr destination address
  42.641 + * @param tunnel return parameter
  42.642 + * @return 0 on success, error code otherwise
  42.643 + */
  42.644 +int vnet_tunnel_open(u32 vnet, u32 addr, Tunnel **tunnel){
  42.645 +    extern TunnelType *etherip_tunnel_type;
  42.646 +    int err = 0;
  42.647 +    Vnet *info = NULL;
  42.648 +    Tunnel *base_tunnel = NULL;
  42.649 +    Tunnel *sa_tunnel = NULL;
  42.650 +    Tunnel *etherip_tunnel = NULL;
  42.651 +
  42.652 +    dprintf("> vnet=%u addr=" IPFMT "\n", vnet, NIPQUAD(addr));
  42.653 +    err = Vnet_lookup(vnet, &info);
  42.654 +    dprintf("> Vnet_lookup=%d\n", err);
  42.655 +    if(err) goto exit;
  42.656 +    if(info->security){
  42.657 +        SAState *sa = NULL;
  42.658 +        dprintf("> security=%d\n", info->security);
  42.659 +        err = Tunnel_create(sa_tunnel_type, vnet, addr, base_tunnel, &sa_tunnel);
  42.660 +        if(err) goto exit;
  42.661 +        dprintf("> sa_tunnel=%p\n", sa_tunnel);
  42.662 +        err = sa_create(info->security, 0, IPPROTO_ESP, addr, &sa);
  42.663 +        if(err) goto exit;
  42.664 +        sa_tunnel->data = sa;
  42.665 +        dprintf("> sa=%p\n", sa);
  42.666 +        base_tunnel = sa_tunnel;
  42.667 +    }
  42.668 +    err = Tunnel_create(etherip_tunnel_type, vnet, addr, base_tunnel, &etherip_tunnel);
  42.669 +    if(err) goto exit;
  42.670 +    err = Tunnel_add(etherip_tunnel);
  42.671 +  exit:
  42.672 +    Tunnel_decref(sa_tunnel);
  42.673 +    Vnet_decref(info);
  42.674 +    if(err){
  42.675 +        *tunnel = NULL;
  42.676 +    } else {
  42.677 +        *tunnel = etherip_tunnel;
  42.678 +    }
  42.679 +    dprintf("< err=%d\n", err);
  42.680 +    return err;
  42.681 +}
  42.682 +
  42.683 +/** Lookup a tunnel for a vnet to a given address.
  42.684 + * Uses an existing tunnel if there is one.
  42.685 + *
  42.686 + * @param vnet vnet id
  42.687 + * @param addr care-of address
  42.688 + * @param tunnel return parameter
  42.689 + * @return 0 on success, error code otherwise
  42.690 + */
  42.691 +int vnet_tunnel_lookup(u32 vnet, u32 addr, Tunnel **tunnel){
  42.692 +    int err = 0;
  42.693 +    dprintf("> vnet=%d addr=" IPFMT "\n", vnet, NIPQUAD(addr));
  42.694 +    *tunnel = Tunnel_lookup(vnet, addr);
  42.695 +    if(!*tunnel){
  42.696 +        err = vnet_tunnel_open(vnet, addr, tunnel);
  42.697 +    }
  42.698 +    dprintf("< err=%d\n", err);
  42.699 +    return err;
  42.700 +}
  42.701 +
  42.702 +/** Send a packet on the appropriate tunnel.
  42.703 + *
  42.704 + * @param vnet vnet
  42.705 + * @param addr tunnel endpoint
  42.706 + * @param skb packet
  42.707 + * @return 0 on success, error code otherwise
  42.708 + */
  42.709 +int vnet_tunnel_send(vnetid_t vnet, vnetaddr_t addr, struct sk_buff *skb){
  42.710 +    int err = 0;
  42.711 +    Tunnel *tunnel = NULL;
  42.712 +    dprintf("> vnet=%u addr=" IPFMT "\n", vnet, NIPQUAD(addr));
  42.713 +    err = vnet_tunnel_lookup(vnet, addr, &tunnel);
  42.714 +    if(err) goto exit;
  42.715 +    err = Tunnel_send(tunnel, skb);
  42.716 +    Tunnel_decref(tunnel);
  42.717 +  exit:
  42.718 +    dprintf("< err=%d\n", err);
  42.719 +    return err;
  42.720 +}
  42.721 +
  42.722 +static void __exit vnet_module_exit(void){
  42.723 +    ProcFS_exit();
  42.724 +    sa_table_exit();
  42.725 +    vnet_exit();
  42.726 +    esp_module_exit();
  42.727 +    etherip_module_exit();
  42.728 +    tunnel_module_init();
  42.729 +    random_module_exit();
  42.730 +}
  42.731 +
  42.732 +/** Initialize the vnet module.
  42.733 + * Failure is fatal.
  42.734 + *
  42.735 + * @return 0 on success, error code otherwise
  42.736 + */
  42.737 +static int __init vnet_module_init(void){
  42.738 +    int err = 0;
  42.739 +
  42.740 +    dprintf(">\n");
  42.741 +    err = random_module_init();
  42.742 +    if(err) wprintf("> random_module_init err=%d\n", err);
  42.743 +    if(err) goto exit;
  42.744 +    err = tunnel_module_init();
  42.745 +    if(err) wprintf("> tunnel_module_init err=%d\n", err);
  42.746 +    if(err) goto exit;
  42.747 +    err = etherip_module_init();
  42.748 +    if(err) wprintf("> etherip_module_init err=%d\n", err);
  42.749 +    if(err) goto exit;
  42.750 +    err = esp_module_init();
  42.751 +    if(err) wprintf("> esp_module_init err=%d\n", err);
  42.752 +    if(err) goto exit;
  42.753 +    err = vnet_init();
  42.754 +    if(err) wprintf("> vnet_init err=%d\n", err);
  42.755 +    if(err) goto exit;
  42.756 +    sa_algorithm_probe_all();
  42.757 +    err = sa_table_init();
  42.758 +    if(err) wprintf("> sa_table_init err=%d\n", err);
  42.759 +    ProcFS_init();
  42.760 +  exit:
  42.761 +    if(err < 0){
  42.762 +        vnet_module_exit();
  42.763 +    }
  42.764 +    if(err < 0) wprintf("< err=%d\n", err);
  42.765 +    return err;
  42.766 +}
  42.767 +
  42.768 +module_init(vnet_module_init);
  42.769 +module_exit(vnet_module_exit);
  42.770 +MODULE_LICENSE("GPL");
    43.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    43.2 +++ b/tools/vnet/vnet-module/vnet.h	Mon Nov 22 16:49:15 2004 +0000
    43.3 @@ -0,0 +1,88 @@
    43.4 +/*
    43.5 + * Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
    43.6 + *
    43.7 + * This program is free software; you can redistribute it and/or modify
    43.8 + * it under the terms of the GNU General Public License as published by the 
    43.9 + * Free Software Foundation; either version 2 of the License, or (at your
   43.10 + * option) any later version.
   43.11 + * 
   43.12 + * This program is distributed in the hope that it will be useful, but
   43.13 + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
   43.14 + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
   43.15 + * for more details.
   43.16 + *
   43.17 + * You should have received a copy of the GNU General Public License along
   43.18 + * with this program; if not, write to the Free software Foundation, Inc.,
   43.19 + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA
   43.20 + *
   43.21 + */
   43.22 +#ifndef __VNET_VNET_H__
   43.23 +#define __VNET_VNET_H__
   43.24 +
   43.25 +#include <asm/atomic.h>
   43.26 +#include <linux/skbuff.h>
   43.27 +
   43.28 +#include <tunnel.h>
   43.29 +#include <skb_context.h>
   43.30 +
   43.31 +struct Vmac;
   43.32 +struct Vif;
   43.33 +struct net_device;
   43.34 +
   43.35 +typedef uint32_t vnetid_t;
   43.36 +typedef uint32_t vnetaddr_t;
   43.37 +
   43.38 +/** Vnet property record. */
   43.39 +typedef struct Vnet {
   43.40 +    /** Reference count. */
   43.41 +    atomic_t refcount;
   43.42 +    /** Vnet id. */
   43.43 +    vnetid_t vnet;
   43.44 +    /** Security flag. If true the vnet requires ESP. */
   43.45 +    int security;
   43.46 +
   43.47 +    struct net_device *dev;
   43.48 +    struct net_device *bridge;
   43.49 +    
   43.50 +    /** Max size of the header. */
   43.51 +    int header_n;
   43.52 +    /** Statistics. */
   43.53 +    struct net_device_stats stats;
   43.54 +    int recursion;
   43.55 +} Vnet;
   43.56 +
   43.57 +extern int Vnet_lookup(vnetid_t id, Vnet **vnet);
   43.58 +extern int Vnet_add(Vnet *vnet);
   43.59 +extern int Vnet_del(vnetid_t vnet);
   43.60 +extern void Vnet_incref(Vnet *);
   43.61 +extern void Vnet_decref(Vnet *);
   43.62 +extern int Vnet_alloc(Vnet **vnet);
   43.63 +extern Vnet *vnet_physical;
   43.64 +
   43.65 +extern int skb_xmit(struct sk_buff *skb);
   43.66 +extern int vnet_skb_send(struct sk_buff *skb, u32 vnet);
   43.67 +extern int vnet_skb_recv(struct sk_buff *skb, u32 vnet, struct Vmac *vmac);
   43.68 +
   43.69 +extern int vnet_check_context(int vnet, SkbContext *context, Vnet **vinfo);
   43.70 +
   43.71 +extern int vnet_tunnel_open(vnetid_t vnet, vnetaddr_t addr, Tunnel **tunnel);
   43.72 +extern int vnet_tunnel_lookup(vnetid_t vnet, vnetaddr_t addr, Tunnel **tunnel);
   43.73 +extern int vnet_tunnel_send(vnetid_t vnet, vnetaddr_t addr, struct sk_buff *skb);
   43.74 +
   43.75 +extern int vnet_init(void);
   43.76 +
   43.77 +enum {
   43.78 +    HANDLE_OK = 1,
   43.79 +    HANDLE_NO = 0,
   43.80 +};
   43.81 +
   43.82 +extern int vnet_sa_security(u32 spi, int protocol, u32 addr);
   43.83 +struct SAState;
   43.84 +extern int vnet_sa_create(u32 spi, int protocol, u32 addr, struct SAState **sa);
   43.85 +
   43.86 +enum {
   43.87 +    VNET_PHYS = 1,
   43.88 +    VNET_VIF = 2,
   43.89 +};
   43.90 +
   43.91 +#endif /* !__VNET_VNET_H__ */
    44.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    44.2 +++ b/tools/vnet/vnet-module/vnet_dev.c	Mon Nov 22 16:49:15 2004 +0000
    44.3 @@ -0,0 +1,534 @@
    44.4 +/*
    44.5 + * Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
    44.6 + *
    44.7 + * This program is free software; you can redistribute it and/or modify
    44.8 + * it under the terms of the GNU General Public License as published by the 
    44.9 + * Free Software Foundation; either version 2 of the License, or (at your
   44.10 + * option) any later version.
   44.11 + * 
   44.12 + * This program is distributed in the hope that it will be useful, but
   44.13 + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
   44.14 + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
   44.15 + * for more details.
   44.16 + *
   44.17 + * You should have received a copy of the GNU General Public License along
   44.18 + * with this program; if not, write to the Free software Foundation, Inc.,
   44.19 + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA
   44.20 + *
   44.21 + */
   44.22 +#include <linux/config.h>
   44.23 +#include <linux/module.h>
   44.24 +#include <linux/types.h>
   44.25 +#include <linux/sched.h>
   44.26 +#include <linux/kernel.h>
   44.27 +
   44.28 +#include <linux/skbuff.h>
   44.29 +#include <linux/netdevice.h>
   44.30 +#include <linux/in.h>
   44.31 +#include <linux/tcp.h>
   44.32 +#include <linux/udp.h>
   44.33 +
   44.34 +#include <net/ip.h>
   44.35 +#include <net/protocol.h>
   44.36 +
   44.37 +#include <linux/if_arp.h>
   44.38 +#include <linux/in6.h>
   44.39 +#include <linux/inetdevice.h>
   44.40 +#include <linux/arcdevice.h>
   44.41 +#include <linux/if_bridge.h>
   44.42 +
   44.43 +#include <etherip.h>
   44.44 +#include <vnet.h>
   44.45 +#include <varp.h>
   44.46 +#include <vif.h>
   44.47 +#include <vnet_dev.h>
   44.48 +
   44.49 +#define MODULE_NAME "VNET"
   44.50 +#define DEBUG 1
   44.51 +#undef DEBUG
   44.52 +#include "debug.h"
   44.53 +
   44.54 +#define VNETIF_FMT "vnetif%u"
   44.55 +#define VNETBR_FMT "vnet%u"
   44.56 +
   44.57 +#ifndef CONFIG_BRIDGE
   44.58 +#error Must configure ethernet bridging in Network Options
   44.59 +#endif
   44.60 +
   44.61 +#include <linux/../../net/bridge/br_private.h>
   44.62 +#define dev_bridge(_dev) ((struct net_bridge *)(_dev)->priv)
   44.63 +
   44.64 +static void vnet_dev_destructor(struct net_device *dev){
   44.65 +    dprintf(">\n");
   44.66 +    dev->open                 = NULL;
   44.67 +    dev->stop                 = NULL;
   44.68 +    dev->uninit               = NULL;
   44.69 +    dev->destructor           = NULL;
   44.70 +    dev->hard_start_xmit      = NULL;
   44.71 +    dev->get_stats            = NULL;
   44.72 +    dev->do_ioctl             = NULL;
   44.73 +    dev->change_mtu           = NULL;
   44.74 +
   44.75 +    dev->tx_timeout           = NULL;
   44.76 +    dev->set_multicast_list   = NULL;
   44.77 +    dev->flags                = 0;
   44.78 +
   44.79 +    dev->priv                 = NULL;
   44.80 +}
   44.81 +
   44.82 +static void vnet_dev_uninit(struct net_device *dev){
   44.83 +    //Vnet *vnet = dev->priv;
   44.84 +    dprintf(">\n");
   44.85 +    //dev_put(dev);
   44.86 +    dprintf("<\n");
   44.87 +}
   44.88 +
   44.89 +static struct net_device_stats *vnet_dev_get_stats(struct net_device *dev){
   44.90 +    Vnet *vnet = dev->priv;
   44.91 +    //dprintf(">\n");
   44.92 +    return &vnet->stats;
   44.93 +}
   44.94 +
   44.95 +static int vnet_dev_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd){
   44.96 +    int err = 0;
   44.97 +    
   44.98 +    dprintf(">\n");
   44.99 +    return err;
  44.100 +}
  44.101 +
  44.102 +static int vnet_dev_change_mtu(struct net_device *dev, int mtu){
  44.103 +    int err = 0;
  44.104 +    Vnet *vnet = dev->priv;
  44.105 +    if (mtu < 68 || mtu > 1500 - vnet->header_n){
  44.106 +        err = -EINVAL;
  44.107 +        goto exit;
  44.108 +    }
  44.109 +    dev->mtu = mtu;
  44.110 +  exit:
  44.111 +    return err;
  44.112 +}
  44.113 +
  44.114 +static int vnet_dev_set_name(struct net_device *dev){
  44.115 +    int err = 0;
  44.116 +    Vnet *vnet = (void*)dev->priv;
  44.117 +
  44.118 +    dprintf(">\n");
  44.119 +    dprintf("> vnet=%d\n", vnet->vnet);
  44.120 +    snprintf(dev->name, IFNAMSIZ - 1, VNETIF_FMT, vnet->vnet);
  44.121 +    if(__dev_get_by_name(dev->name)){
  44.122 +        err = -ENOMEM;
  44.123 +    }
  44.124 +    dprintf("< err=%d\n", err);
  44.125 +    return err;
  44.126 +}
  44.127 +
  44.128 +//============================================================================
  44.129 +#ifdef CONFIG_VNET_BRIDGE
  44.130 +
  44.131 +#define BRIDGE DEVICE
  44.132 +
  44.133 +void vnet_bridge_fini(Vnet *vnet){
  44.134 +    if(!vnet) return;
  44.135 +    if(vnet->bridge){
  44.136 +        br_del_bridge(vnet->bridge->name);
  44.137 +        vnet->bridge = NULL;
  44.138 +    }
  44.139 +}
  44.140 +
  44.141 +/** Create the bridge for a vnet, and add the
  44.142 + * vnet interface to it.
  44.143 + *
  44.144 + * @param vnet vnet
  44.145 + * @return 0 on success, error code otherwise
  44.146 + */
  44.147 +int vnet_bridge_init(Vnet *vnet){
  44.148 +    int err = 0;
  44.149 +    char bridge[IFNAMSIZ] = {};
  44.150 +    struct net_bridge *br;
  44.151 +    vnet->bridge = NULL;
  44.152 +    snprintf(bridge, IFNAMSIZ - 1, VNETBR_FMT, vnet->vnet);
  44.153 +    rtnl_lock();
  44.154 +    err = br_add_bridge(bridge);
  44.155 +    rtnl_unlock();
  44.156 +    if(err){
  44.157 +        dprintf("> Error creating vnet bridge %s: err=%d\n", bridge, err);
  44.158 +        goto exit;
  44.159 +    }
  44.160 +    vnet->bridge = __dev_get_by_name(bridge);
  44.161 +    if(!vnet->bridge){
  44.162 +        wprintf("> Vnet bridge %s is null!\n", bridge);
  44.163 +        err = -EINVAL;
  44.164 +        goto exit;
  44.165 +    }
  44.166 +    br = dev_bridge(vnet->bridge);
  44.167 +    br->stp_enabled = 0;
  44.168 +    br->bridge_hello_time = 0;
  44.169 +    br->hello_time = 0;
  44.170 +    br->bridge_forward_delay = 0;
  44.171 +    br->forward_delay = 0;
  44.172 +    rtnl_lock();
  44.173 +    err = br_add_if(br, vnet->dev);
  44.174 +    rtnl_unlock();
  44.175 +    if(err){
  44.176 +        dprintf("> Error adding vif %s to vnet bridge %s: err=%d\n",
  44.177 +                vnet->dev->name, bridge, err);
  44.178 +        goto exit;
  44.179 +    }
  44.180 +    rtnl_lock();
  44.181 +    dev_open(vnet->dev);
  44.182 +    dev_open(vnet->bridge);
  44.183 +    rtnl_unlock();
  44.184 +  exit:
  44.185 +    if(err){
  44.186 +        if(vnet->bridge){
  44.187 +            rtnl_lock();
  44.188 +            br_del_bridge(bridge);
  44.189 +            rtnl_unlock();
  44.190 +            vnet->bridge = NULL;
  44.191 +        }
  44.192 +    }
  44.193 +    return err;
  44.194 +}
  44.195 +
  44.196 +
  44.197 +/** Add an interface to the bridge for a vnet.
  44.198 + *
  44.199 + * @param vnet vnet
  44.200 + * @param dev interface
  44.201 + * @return 0 on success, error code otherwise
  44.202 + */
  44.203 +int vnet_add_if(Vnet *vnet, struct net_device *dev){
  44.204 +    int err = 0;
  44.205 +    struct net_device *brdev;
  44.206 +
  44.207 +    dprintf(">\n");
  44.208 +    if(!vnet->bridge){
  44.209 +        err = -EINVAL;
  44.210 +        goto exit;
  44.211 +    }
  44.212 +    // Delete the interface from the default bridge.
  44.213 +    // todo: Really want to delete it from any bridge it's in.
  44.214 +    if(!vnet_get_device(BRIDGE, &brdev)){
  44.215 +        rtnl_lock();
  44.216 +        br_del_if(dev_bridge(brdev), dev);
  44.217 +        rtnl_unlock();
  44.218 +    }
  44.219 +    dprintf("> br_add_if %s %s\n", vnet->bridge->name, dev->name);
  44.220 +    rtnl_lock();
  44.221 +    dev_open(dev);
  44.222 +    dev_open(vnet->bridge);
  44.223 +    err = br_add_if(dev_bridge(vnet->bridge), dev);
  44.224 +    rtnl_unlock();
  44.225 +  exit:
  44.226 +    dprintf("< err=%d\n", err);
  44.227 +    return err;
  44.228 +}
  44.229 +
  44.230 +int vnet_del_if(Vnet *vnet, struct net_device *dev){
  44.231 +    int err = 0;
  44.232 +
  44.233 +    dprintf(">\n");
  44.234 +    if(!vnet->bridge){
  44.235 +        err = -EINVAL;
  44.236 +        goto exit;
  44.237 +    }
  44.238 +    rtnl_lock();
  44.239 +    br_del_if(dev_bridge(vnet->bridge), dev);
  44.240 +    rtnl_unlock();
  44.241 +  exit:
  44.242 +    dprintf("< err=%d\n", err);
  44.243 +    return err;
  44.244 +}
  44.245 +    
  44.246 +
  44.247 +/** Create the bridge and virtual interface for a vnet.
  44.248 + *
  44.249 + * @param info vnet
  44.250 + * @return 0 on success, error code otherwise
  44.251 + */
  44.252 +int Vnet_create(Vnet *info){
  44.253 +    int err = 0;
  44.254 +
  44.255 +    dprintf("> %u\n", info->vnet);
  44.256 +    err = vnet_dev_add(info);
  44.257 +    if(err) goto exit;
  44.258 +    dprintf("> vnet_bridge_init\n");
  44.259 +    err = vnet_bridge_init(info);
  44.260 +    if(err) goto exit;
  44.261 +    dprintf("> Vnet_add...\n");
  44.262 +    err = Vnet_add(info);
  44.263 +  exit:
  44.264 +    if(err){
  44.265 +        dprintf("> vnet_bridge_fini...\n");
  44.266 +        vnet_bridge_fini(info);
  44.267 +    }
  44.268 +    dprintf("< err=%d\n", err);
  44.269 +    return err;
  44.270 +}
  44.271 +    
  44.272 +
  44.273 +
  44.274 +/** Remove the net device for a vnet.
  44.275 + * Clears the dev field of the vnet.
  44.276 + * Safe to call if the vnet or its dev are null.
  44.277 + *
  44.278 + * @param vnet vnet
  44.279 + */
  44.280 +void vnet_dev_remove(Vnet *vnet){
  44.281 +    if(!vnet) return;
  44.282 +    dprintf("> vnet=%u\n", vnet->vnet);
  44.283 +    if(vnet->bridge){
  44.284 +        dprintf("> br_del_bridge(%s)\n", vnet->bridge->name);
  44.285 +        rtnl_lock();
  44.286 +        br_del_bridge(vnet->bridge->name);
  44.287 +        rtnl_unlock();
  44.288 +        vnet->bridge = NULL;
  44.289 +    }
  44.290 +    if(vnet->dev){
  44.291 +        //dev_put(vnet->dev);
  44.292 +        dprintf("> unregister_netdev(%s)\n", vnet->dev->name);
  44.293 +        unregister_netdev(vnet->dev);
  44.294 +        vnet->dev = NULL;
  44.295 +    }
  44.296 +    dprintf("<\n");
  44.297 +}
  44.298 +
  44.299 +//============================================================================
  44.300 +#else
  44.301 +//============================================================================
  44.302 +
  44.303 +/** Create the virtual interface for a vnet.
  44.304 + *
  44.305 + * @param info vnet
  44.306 + * @return 0 on success, error code otherwise
  44.307 + */
  44.308 +int Vnet_create(Vnet *info){
  44.309 +    int err = 0;
  44.310 +
  44.311 +    dprintf("> %u\n", info->vnet);
  44.312 +    err = vnet_dev_add(info);
  44.313 +    if(err) goto exit;
  44.314 +    dprintf("> Vnet_add...\n");
  44.315 +    err = Vnet_add(info);
  44.316 +  exit:
  44.317 +    dprintf("< err=%d\n", err);
  44.318 +    return err;
  44.319 +}
  44.320 +    
  44.321 +int vnet_add_if(Vnet *vnet, struct net_device *dev){
  44.322 +    int err = -ENOSYS;
  44.323 +    return err;
  44.324 +}
  44.325 +
  44.326 +
  44.327 +int vnet_del_if(Vnet *vnet, struct net_device *dev){
  44.328 +    int err = 0;
  44.329 +    return err;
  44.330 +}
  44.331 +
  44.332 +/** Remove the net device for a vnet.
  44.333 + * Clears the dev field of the vnet.
  44.334 + * Safe to call if the vnet or its dev are null.
  44.335 + *
  44.336 + * @param vnet vnet
  44.337 + */
  44.338 +void vnet_dev_remove(Vnet *vnet){
  44.339 +    if(!vnet) return;
  44.340 +    dprintf("> vnet=%u\n", vnet->vnet);
  44.341 +    if(vnet->dev){
  44.342 +        //dev_put(vnet->dev);
  44.343 +        dprintf("> unregister_netdev(%s)\n", vnet->dev->name);
  44.344 +        unregister_netdev(vnet->dev);
  44.345 +        vnet->dev = NULL;
  44.346 +    }
  44.347 +    dprintf("<\n");
  44.348 +}
  44.349 +#endif
  44.350 +//============================================================================
  44.351 +
  44.352 +static int vnet_dev_open(struct net_device *dev){
  44.353 +    int err = 0;
  44.354 +    dprintf(">\n");
  44.355 +    netif_start_queue(dev);
  44.356 +    dprintf("<\n");
  44.357 +    return err;
  44.358 +}
  44.359 +
  44.360 +static int vnet_dev_stop(struct net_device *dev){
  44.361 +    int err = 0;
  44.362 +    dprintf(">\n");
  44.363 +    netif_stop_queue(dev);
  44.364 +    dprintf("<\n");
  44.365 +    return err;
  44.366 +}
  44.367 +
  44.368 +static int vnet_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev){
  44.369 +    int err = 0;
  44.370 +    Vnet *vnet = dev->priv;
  44.371 +
  44.372 +    dprintf("> skb=%p\n", skb);
  44.373 +    if(vnet->recursion++) {
  44.374 +        vnet->stats.collisions++;
  44.375 +	vnet->stats.tx_errors++;
  44.376 +        wprintf("> recursion!\n");
  44.377 +	dev_kfree_skb(skb);
  44.378 +        goto exit;
  44.379 +    }
  44.380 +    if(!skb){
  44.381 +        err = -EINVAL;
  44.382 +        wprintf("> skb NULL!\n");
  44.383 +        goto exit;
  44.384 +    }
  44.385 +    dprintf("> skb->data=%p skb->mac.raw=%p\n", skb->data, skb->mac.raw);
  44.386 +    if(skb->mac.raw < skb->data || skb->mac.raw > skb->nh.raw){
  44.387 +        wprintf("> skb mac duff!\n");
  44.388 +        skb->mac.raw = skb->data;
  44.389 +    }
  44.390 +    //dev->trans_start = jiffies;
  44.391 +    err = vnet_skb_send(skb, vnet->vnet);
  44.392 +    if(err < 0){
  44.393 +        vnet->stats.tx_errors++;
  44.394 +    } else {
  44.395 +        vnet->stats.tx_packets++;
  44.396 +        vnet->stats.tx_bytes += skb->len;
  44.397 +    }
  44.398 +  exit:
  44.399 +    vnet->recursion--;
  44.400 +    dprintf("<\n");
  44.401 +    return 0;
  44.402 +}
  44.403 +
  44.404 +void vnet_dev_tx_timeout(struct net_device *dev){
  44.405 +    dprintf(">\n");
  44.406 +    //dev->trans_start = jiffies;
  44.407 +    //netif_wake_queue(dev);
  44.408 +}
  44.409 +
  44.410 +void vnet_dev_set_multicast_list(struct net_device *dev){
  44.411 +    dprintf(">\n");
  44.412 +}
  44.413 +
  44.414 +static int (*eth_hard_header)(struct sk_buff *skb,
  44.415 +                              struct net_device *dev, unsigned short type,
  44.416 +                              void *daddr, void *saddr, unsigned len) = NULL;
  44.417 +
  44.418 +static int vnet_dev_hard_header(struct sk_buff *skb,
  44.419 +                                struct net_device *dev, unsigned short type,
  44.420 +                                void *daddr, void *saddr, unsigned len){
  44.421 +    int err = 0;
  44.422 +    dprintf("> skb=%p ethhdr=%p dev=%s len=%u\n",
  44.423 +            skb, skb->mac.raw, dev->name, len);
  44.424 +    if(saddr){
  44.425 +        dprintf("> saddr=" MACFMT "\n", MAC6TUPLE((unsigned char*)saddr));
  44.426 +    } else {
  44.427 +        dprintf("> saddr=NULL\n");
  44.428 +    }
  44.429 +    if(daddr){
  44.430 +        dprintf("> daddr=" MACFMT "\n", MAC6TUPLE((unsigned char*)daddr));
  44.431 +    } else {
  44.432 +        dprintf("> daddr=NULL\n");
  44.433 +    }
  44.434 +    err = eth_hard_header(skb, dev, type, daddr, saddr, len);
  44.435 +    dprintf("> eth_hard_header=%d\n", err);
  44.436 +    skb->mac.raw = skb->data;
  44.437 +    dprintf("> src=" MACFMT " dst=" MACFMT "\n",
  44.438 +            MAC6TUPLE(skb->mac.ethernet->h_source),
  44.439 +            MAC6TUPLE(skb->mac.ethernet->h_dest));
  44.440 +    dprintf("< err=%d\n", err);
  44.441 +    return err;
  44.442 +}
  44.443 +
  44.444 +void vnet_dev_mac(unsigned char *mac){
  44.445 +    static unsigned val = 1;
  44.446 +    struct net_device *dev;
  44.447 +
  44.448 +    if(vnet_get_device(DEVICE, &dev)){
  44.449 +        mac[0] = 0xAA;
  44.450 +        mac[1] = 0xFF;
  44.451 +        mac[2] = (unsigned char)((val >> 24) & 0xff);
  44.452 +        mac[3] = (unsigned char)((val >> 16) & 0xff);
  44.453 +        mac[4] = (unsigned char)((val >>  8) & 0xff);
  44.454 +        mac[5] = (unsigned char)((val      ) & 0xff);
  44.455 +        val++;
  44.456 +    } else {
  44.457 +        memcpy(mac, dev->dev_addr, ETH_ALEN);
  44.458 +        dev_put(dev);
  44.459 +    }
  44.460 +}
  44.461 +
  44.462 +static int vnet_dev_init(struct net_device *dev){
  44.463 +    int err = 0;
  44.464 +    Vnet *vnet = (void*)dev->priv;
  44.465 + 
  44.466 +    dprintf(">\n");
  44.467 +    ether_setup(dev);
  44.468 +
  44.469 +    if(!eth_hard_header) eth_hard_header = dev->hard_header;
  44.470 +    dev->hard_header          = vnet_dev_hard_header;
  44.471 +
  44.472 +    dev->open                 = vnet_dev_open;
  44.473 +    dev->stop                 = vnet_dev_stop;
  44.474 +    dev->uninit               = vnet_dev_uninit;
  44.475 +    dev->destructor           = vnet_dev_destructor;
  44.476 +    dev->hard_start_xmit      = vnet_dev_hard_start_xmit;
  44.477 +    dev->get_stats            = vnet_dev_get_stats;
  44.478 +    dev->do_ioctl             = vnet_dev_do_ioctl;
  44.479 +    dev->change_mtu           = vnet_dev_change_mtu;
  44.480 +
  44.481 +    dev->tx_timeout           = vnet_dev_tx_timeout;
  44.482 +    dev->watchdog_timeo       = TX_TIMEOUT;
  44.483 +    dev->set_multicast_list   = vnet_dev_set_multicast_list;
  44.484 +    
  44.485 +    dev->hard_header_len      += vnet->header_n;
  44.486 +    dev->mtu                  -= vnet->header_n;
  44.487 +
  44.488 +    vnet_dev_mac(dev->dev_addr);
  44.489 +
  44.490 +    dev->flags |= IFF_DEBUG;
  44.491 +    dev->flags |= IFF_PROMISC;
  44.492 +    dev->flags |= IFF_ALLMULTI;
  44.493 +
  44.494 +    dprintf("<\n");
  44.495 +    return err;
  44.496 +}
  44.497 +
  44.498 +/** Add the interface (net device) for a vnet.
  44.499 + * Sets the dev field of the vnet on success.
  44.500 + * Does nothing if the vif already has an interface.
  44.501 + *
  44.502 + * @param vif vif
  44.503 + * @return 0 on success, error code otherwise
  44.504 + */
  44.505 +int vnet_dev_add(Vnet *vnet){
  44.506 +    int err = 0;
  44.507 +    struct net_device *dev = NULL;
  44.508 +
  44.509 +    dprintf("> vnet=%p\n", vnet);
  44.510 +    if(vnet->dev) goto exit;
  44.511 +    vnet->header_n = sizeof(struct iphdr) + sizeof(struct etheriphdr);
  44.512 +    dev = kmalloc(sizeof(struct net_device), GFP_ATOMIC);
  44.513 +    if(!dev){ err = -ENOMEM; goto exit; }
  44.514 +    *dev = (struct net_device){};
  44.515 +    dev->priv = vnet;
  44.516 +    vnet->dev = dev;
  44.517 +
  44.518 +    err = vnet_dev_set_name(dev);
  44.519 +    if(err) goto exit;
  44.520 +    vnet_dev_init(dev);
  44.521 +    dprintf("> name=%s, register_netdev...\n", dev->name);
  44.522 +    err = register_netdev(dev);
  44.523 +    dprintf("> register_netdev=%d\n", err);
  44.524 +    if(err) goto exit;
  44.525 +    rtnl_lock();
  44.526 +    dev_open(dev);
  44.527 +    rtnl_unlock();
  44.528 +
  44.529 +    //dev_hold(dev);
  44.530 +  exit:
  44.531 +    if(err){
  44.532 +        if(dev) kfree(dev);
  44.533 +        vnet->dev = NULL;
  44.534 +    }
  44.535 +    dprintf("< err=%d\n", err);
  44.536 +    return err;
  44.537 +}
    45.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    45.2 +++ b/tools/vnet/vnet-module/vnet_dev.h	Mon Nov 22 16:49:15 2004 +0000
    45.3 @@ -0,0 +1,31 @@
    45.4 +/*
    45.5 + * Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
    45.6 + *
    45.7 + * This program is free software; you can redistribute it and/or modify
    45.8 + * it under the terms of the GNU General Public License as published by the 
    45.9 + * Free Software Foundation; either version 2 of the License, or (at your
   45.10 + * option) any later version.
   45.11 + * 
   45.12 + * This program is distributed in the hope that it will be useful, but
   45.13 + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
   45.14 + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
   45.15 + * for more details.
   45.16 + *
   45.17 + * You should have received a copy of the GNU General Public License along
   45.18 + * with this program; if not, write to the Free software Foundation, Inc.,
   45.19 + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA
   45.20 + *
   45.21 + */
   45.22 +#ifndef _VNET_VNET_DEV_H_
   45.23 +#define _VNET_VNET_DEV_H_
   45.24 +
   45.25 +struct Vnet;
   45.26 +struct net_device;
   45.27 +
   45.28 +extern int vnet_dev_add(struct Vnet *vnet);
   45.29 +extern void vnet_dev_remove(struct Vnet *vnet);
   45.30 +extern int Vnet_create(struct Vnet *info);
   45.31 +extern int vnet_add_if(struct Vnet *vnet, struct net_device *dev);
   45.32 +extern int vnet_del_if(struct Vnet *vnet, struct net_device *dev);
   45.33 +
   45.34 +#endif
    46.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    46.2 +++ b/tools/vnet/vnet-module/vnet_ioctl.c	Mon Nov 22 16:49:15 2004 +0000
    46.3 @@ -0,0 +1,815 @@
    46.4 +/*
    46.5 + * Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
    46.6 + *
    46.7 + * This program is free software; you can redistribute it and/or modify
    46.8 + * it under the terms of the GNU General Public License as published by the 
    46.9 + * Free Software Foundation; either version 2 of the License, or (at your
   46.10 + * option) any later version.
   46.11 + * 
   46.12 + * This program is distributed in the hope that it will be useful, but
   46.13 + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
   46.14 + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
   46.15 + * for more details.
   46.16 + *
   46.17 + * You should have received a copy of the GNU General Public License along
   46.18 + * with this program; if not, write to the Free software Foundation, Inc.,
   46.19 + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA
   46.20 + *
   46.21 + */
   46.22 +#include <linux/config.h>
   46.23 +#include <linux/module.h>
   46.24 +
   46.25 +#include <linux/types.h>
   46.26 +#include <linux/kernel.h>
   46.27 +#include <linux/errno.h>
   46.28 +
   46.29 +#include <asm/uaccess.h>
   46.30 +
   46.31 +#include <linux/slab.h>
   46.32 +
   46.33 +#include <linux/proc_fs.h>
   46.34 +#include <linux/string.h>
   46.35 +
   46.36 +#include <linux/net.h>
   46.37 +#include <linux/in.h>
   46.38 +#include <linux/inet.h>
   46.39 +#include <linux/netdevice.h>
   46.40 +
   46.41 +#include <sa.h>
   46.42 +#include "vif.h"
   46.43 +#include "vnet.h"
   46.44 +#include "varp.h"
   46.45 +#include "vnet_dev.h"
   46.46 +
   46.47 +#include "sxpr_parser.h"
   46.48 +#include "iostream.h"
   46.49 +#include "kernel_stream.h"
   46.50 +#include "sys_string.h"
   46.51 +#include "sys_net.h"
   46.52 +
   46.53 +#define MODULE_NAME "VNET"
   46.54 +#define DEBUG 1
   46.55 +#undef DEBUG
   46.56 +#include "debug.h"
   46.57 +
   46.58 +// Functions to manage vnets.
   46.59 +/*
   46.60 +
   46.61 +Have to rely on ethernet bridging being configured - but we can't rely
   46.62 +on the kernel interface being available to us (it's not exported @!$"%!).
   46.63 +
   46.64 +Create a vnet N:
   46.65 +- create the vnet device vnetifN: using commands to /proc, kernel api
   46.66 +- create the vnet bridge vnetN: using brctl in user-space
   46.67 +- for best results something should keep track of the mapping vnet id <-> bridge name
   46.68 +
   46.69 +Add vif device vifD.N to vnet N.
   46.70 +- domain is configured with vifD.N on bridge vnetN
   46.71 +- vif script adds vif to bridge using brctl
   46.72 +- vif script detects that the bridge is a vnet bridge and
   46.73 +  uses /proc commands to configure the mac on the vnet
   46.74 +
   46.75 +Wouldn't be hard to add support for specifying vnet keys(s) in
   46.76 +the control interface.
   46.77 +
   46.78 +*/
   46.79 +
   46.80 +    // id         vnet id
   46.81 +    // security   security level
   46.82 +    // ciphersuite: digest, cipher, keys??
   46.83 +/* Security policy.
   46.84 +   vnet
   46.85 +   src: mac
   46.86 +   dst: mac
   46.87 +   coa: ip
   46.88 +   Map vnet x coa -> security (none, auth, conf)
   46.89 +
   46.90 +   Policy, e.g.
   46.91 +   - same subnet x vnet
   46.92 +   - diff subnet x vnet
   46.93 +   - some subnet x vnet
   46.94 +   - some host addr x vnet
   46.95 +
   46.96 +   (security (net local) (vnet *) (mode none))
   46.97 +   (security (net (not local))
   46.98 +
   46.99 +   (security (addr, vnet) (local-subnet addr)       none)
  46.100 +   (security (addr, vnet) (not (local-subnet addr)) conf)
  46.101 +   (security (addr, vnet) (host 15.144.27.80)
  46.102 +   (security (addr, vnet) (subnet addr 15.144.24.0/24) auth)
  46.103 +   (security (addr, vnet) t auth)
  46.104 +
  46.105 +   (security (addr local)         (mode none))
  46.106 +   (security (addr local/16)      (mode none))
  46.107 +   (security (addr 15.144.0.0/16) (mode auth))
  46.108 +   (security (addr 15.0.0.0/8)    (mode conf))
  46.109 +   (security (addr *)             (mode drop))
  46.110 +
  46.111 +   ?Varp security
  46.112 +   Use esp too - none, auth, conf,
  46.113 +   Varp sends broadcasts (requests) and unicasts (replies).
  46.114 +   Uses UDP. Could send over ESP if needed.
  46.115 +   For bcast don't know where it goes, so security has to be by vnet.
  46.116 +   For ucast know where it goes, so could do by vnet and addr.
  46.117 +
  46.118 +   Similar issue for vnets: know where unicast goes but don't know where
  46.119 +   bcast goes.
  46.120 +
  46.121 +   Simplify: 2 levels
  46.122 +   local ucast
  46.123 +   nonlocal ucast, mcast
  46.124 +
  46.125 +   (security (local none) (nonlocal conf))
  46.126 +   (security (local auth) (nonlocal conf))
  46.127 +
  46.128 +   VARP security matches vnet security.
  46.129 +
  46.130 + */
  46.131 +
  46.132 +/** @file
  46.133 + *
  46.134 + * Kernel interface to files in /proc.
  46.135 + */
  46.136 +
  46.137 +#define PROC_ROOT "/proc/"
  46.138 +#define PROC_ROOT_LEN 6
  46.139 +#define MODULE_ROOT PROC_ROOT "vnet"
  46.140 +
  46.141 +enum {
  46.142 +    VNET_POLICY = 1,
  46.143 +};
  46.144 +
  46.145 +typedef struct proc_dir_entry ProcEntry;
  46.146 +typedef struct inode Inode;
  46.147 +typedef struct file File;
  46.148 +
  46.149 +static int proc_open_fn(struct inode *inode, File *file);
  46.150 +static ssize_t proc_read_fn(File *file, char *buffer, size_t count, loff_t *offset);
  46.151 +static ssize_t proc_write_fn(File *file, const char *buffer, size_t count, loff_t *offset) ;
  46.152 +//static int proc_flush_fn(File *file);
  46.153 +static loff_t proc_lseek_fn(File * file, loff_t offset, int orig);
  46.154 +static int proc_ioctl_fn(struct inode *inode, File *file, unsigned opcode, unsigned long arg);
  46.155 +static int proc_release_fn(struct inode *inode, File *file);
  46.156 +
  46.157 +static int eval(Sxpr exp);
  46.158 +
  46.159 +static int ProcEntry_has_name(ProcEntry *entry, const char *name, int namelen){
  46.160 +    dprintf("> name=%.*s entry=%.*s\n", namelen, name, entry->namelen, entry->name);
  46.161 +    if(!entry || !entry->low_ino) return FALSE;
  46.162 +    if(entry->namelen != namelen) return FALSE;
  46.163 +    return memcmp(name, entry->name, namelen) == 0;
  46.164 +}
  46.165 +
  46.166 +// Set f->f_error on error?
  46.167 +// Does interface stop r/w on first error?
  46.168 +// Is release called after an error?
  46.169 +//
  46.170 +
  46.171 +static struct file_operations proc_file_ops = {
  46.172 +    //owner:   THIS_MODULE,
  46.173 +    open:    proc_open_fn,
  46.174 +    read:    proc_read_fn,
  46.175 +    write:   proc_write_fn,
  46.176 +    //flush:   proc_flush_fn,
  46.177 +    llseek:  proc_lseek_fn,
  46.178 +    ioctl:   proc_ioctl_fn,
  46.179 +    release: proc_release_fn,
  46.180 +};
  46.181 +
  46.182 +static int proc_get_parser(File *file, Parser **val){
  46.183 +    int err = 0;
  46.184 +    Parser *parser = NULL;
  46.185 +    parser = file->private_data;
  46.186 +    if(!parser){
  46.187 +        parser = Parser_new();
  46.188 +        if(!parser){
  46.189 +            err = -ENOMEM;
  46.190 +            goto exit;
  46.191 +        }
  46.192 +        file->private_data = parser;
  46.193 +    }
  46.194 +  exit:
  46.195 +    *val = parser;
  46.196 +    return err;
  46.197 +}
  46.198 +
  46.199 +static int proc_open_fn(Inode *inode, File *file){
  46.200 +    // User open.
  46.201 +    // Return errcode or 0 on success.
  46.202 +    // Can stuff data in file->private_data (void*).
  46.203 +    // Get entry from
  46.204 +    //ProcEntry *entry = (ProcEntry *)inode->u.generic_ip;
  46.205 +    //file->private_data = NULL;
  46.206 +    // Check for user privilege - deny otherwise.
  46.207 +    // -EACCESS
  46.208 +    int err = 0;
  46.209 +    dprintf(">\n");
  46.210 +    file->private_data = NULL;
  46.211 +    return err;
  46.212 +}
  46.213 +
  46.214 +static ssize_t proc_read_fn(File *file, char *buffer,
  46.215 +                            size_t count, loff_t *offset){
  46.216 +    // User read.
  46.217 +    // Copy data to user buffer, increment offset by count, return count.
  46.218 +    dprintf(">\n");
  46.219 +    count = 0;
  46.220 +    //if(copy_to_user(buffer, data, count)){
  46.221 +    //    return -EFAULT;
  46.222 +    //}
  46.223 +    //*offset += count;
  46.224 +    return count;
  46.225 +}
  46.226 +
  46.227 +static ssize_t proc_write_fn(File *file, const char *buffer,
  46.228 +                             size_t count, loff_t *offset) {
  46.229 +    // User write.
  46.230 +    // Copy data into kernel space from buffer.
  46.231 +    // Increment offset by count, return count (or code).
  46.232 +    int err = 0;
  46.233 +    char *data = NULL;
  46.234 +    Parser *parser = NULL;
  46.235 +
  46.236 +    //dprintf("> count=%d\n", count);
  46.237 +    err = proc_get_parser(file, &parser);
  46.238 +    if(err) goto exit;
  46.239 +    data = allocate(count);
  46.240 +    if(!data){
  46.241 +        err = -ENOMEM;
  46.242 +        goto exit;
  46.243 +    }
  46.244 +    err = copy_from_user(data, buffer, count);
  46.245 +    if(err) goto exit;
  46.246 +    *offset += count;
  46.247 +    err = Parser_input(parser, data, count);
  46.248 +  exit:
  46.249 +    deallocate(data);
  46.250 +    err = (err < 0 ? err : count);
  46.251 +    //dprintf("< err = %d\n", err);
  46.252 +    return err;
  46.253 +}
  46.254 +
  46.255 +#if 0
  46.256 +static int proc_flush_fn(File *file){
  46.257 +    // User flush.
  46.258 +    int writing = (file->f_flags & O_ACCMODE) == O_WRONLY;
  46.259 +    int f_count = atomic_read(&file->f_count);
  46.260 +    if (writing && f_count == 1) {
  46.261 +        ProcEntry *pentry = (ProcEntry *)file->f_dentry->d_inode->u.generic_ip;
  46.262 +        // ...
  46.263 +    }
  46.264 +  return retval;
  46.265 +}
  46.266 +#endif
  46.267 +
  46.268 +#ifndef SEEK_SET
  46.269 +enum {
  46.270 +    /** Offset from start. */
  46.271 +    SEEK_SET = 0,
  46.272 +    /** Offset from current position. */
  46.273 +    SEEK_CUR = 1,
  46.274 +    /** Offset from size of file. */
  46.275 +    SEEK_END = 2
  46.276 +};
  46.277 +#endif /* !SEEK_SET */
  46.278 +
  46.279 +static loff_t proc_lseek_fn(File * file, loff_t offset, int from){
  46.280 +    // User lseek.
  46.281 +    dprintf(">\n");
  46.282 +    switch(from){
  46.283 +    case SEEK_SET:
  46.284 +        break;
  46.285 +    case SEEK_CUR:
  46.286 +	offset += file->f_pos;
  46.287 +        break;
  46.288 +    case SEEK_END:
  46.289 +	return -EINVAL;
  46.290 +    default:
  46.291 +	return -EINVAL;
  46.292 +    }
  46.293 +    if(offset < 0) return -EINVAL;    
  46.294 +    file->f_pos = offset;
  46.295 +    return offset;
  46.296 +}
  46.297 +
  46.298 +static int proc_ioctl_fn(Inode *inode, File *file,
  46.299 +                         unsigned opcode, unsigned long arg){
  46.300 +    // User ioctl.
  46.301 +    dprintf(">\n");
  46.302 +    return 0;
  46.303 +}
  46.304 +
  46.305 +static int proc_release_fn(Inode *inode, File *file){
  46.306 +    // User close.
  46.307 +    // Cleanup file->private_data, return errcode.
  46.308 +    int err = 0;
  46.309 +    Parser *parser = NULL;
  46.310 +    Sxpr obj, l;
  46.311 +
  46.312 +    dprintf(">\n");
  46.313 +    err = proc_get_parser(file, &parser);
  46.314 +    if(err) goto exit;
  46.315 +    err = Parser_input(parser, NULL, 0);
  46.316 +    if(err) goto exit;
  46.317 +    obj = parser->val;
  46.318 +    objprint(iostdout, obj, 0); IOStream_print(iostdout, "\n");
  46.319 +    for(l = obj; CONSP(l); l = CDR(l)){
  46.320 +        err = eval(CAR(l));
  46.321 +        if(err) break;
  46.322 +    }
  46.323 +  exit:
  46.324 +    Parser_free(parser);
  46.325 +    file->private_data = NULL;
  46.326 +    dprintf("< err=%d\n", err);
  46.327 +    return err;
  46.328 +}
  46.329 +
  46.330 +static ProcEntry *proc_fs_root = &proc_root;
  46.331 +
  46.332 +static int proc_path_init(const char *path, const char **rest){
  46.333 +    int err = 0;
  46.334 +
  46.335 +    if(!path){
  46.336 +        err = -EINVAL;
  46.337 +        goto exit;
  46.338 +    }
  46.339 +    if(*path == '/'){
  46.340 +        if(strncmp(PROC_ROOT, path, PROC_ROOT_LEN)){
  46.341 +            err = -EINVAL;
  46.342 +        } else {
  46.343 +            path += PROC_ROOT_LEN;
  46.344 +        }
  46.345 +    }
  46.346 +  exit:
  46.347 +    *rest = path;
  46.348 +    return err;
  46.349 +}
  46.350 +
  46.351 +
  46.352 +/** Parse a path relative to `dir'. If dir is null or the proc root
  46.353 + * the path is relative to "/proc/", and the leading "/proc/" may be
  46.354 + * supplied.
  46.355 + *
  46.356 + */
  46.357 +static ProcEntry * ProcFS_lookup(const char *path, ProcEntry *dir){
  46.358 +    const char *pathptr = path, *next = NULL;
  46.359 +    ProcEntry *entry, *result = NULL;
  46.360 +    int pathlen;
  46.361 +
  46.362 +    if(dir && (dir != proc_fs_root)){
  46.363 +        entry = dir;
  46.364 +    } else {
  46.365 +        if(proc_path_init(path, &pathptr)) goto exit;
  46.366 +        entry = proc_fs_root;
  46.367 +    }
  46.368 +    if(!pathptr || !*pathptr) goto exit;
  46.369 +    while(1){
  46.370 +        next = strchr(pathptr, '/');
  46.371 +        pathlen = (next ? next - pathptr : strlen(pathptr));
  46.372 +        for(entry = entry->subdir; entry ; entry = entry->next) {
  46.373 +            if(ProcEntry_has_name(entry, pathptr, pathlen)) break;
  46.374 +        }
  46.375 +        if (!entry) break;
  46.376 +        if(!next){
  46.377 +            result = entry;
  46.378 +            break;
  46.379 +        }
  46.380 +        pathptr = next + 1;
  46.381 +    }
  46.382 +  exit:
  46.383 +    return result;
  46.384 +}
  46.385 +
  46.386 +static ProcEntry *ProcFS_register(const char *name, ProcEntry *dir, int val){
  46.387 +    mode_t mode = 0;
  46.388 +    ProcEntry *entry;
  46.389 +
  46.390 +    entry = create_proc_entry(name, mode, dir);
  46.391 +    if(entry){
  46.392 +        entry->proc_fops = &proc_file_ops;
  46.393 +        entry->data = (void*)val; // Whatever data we need.
  46.394 +    }
  46.395 +    return entry;
  46.396 +}
  46.397 +
  46.398 +static ProcEntry *ProcFS_mkdir(const char *name, ProcEntry *parent){
  46.399 +    ProcEntry *entry = NULL;
  46.400 +    entry = ProcFS_lookup(name, parent);
  46.401 +    if(!entry){
  46.402 +        const char *path;
  46.403 +        if(proc_path_init(name, &path)) goto exit;
  46.404 +        entry = proc_mkdir(path, parent);
  46.405 +    }
  46.406 +  exit:
  46.407 +    return entry;
  46.408 +}
  46.409 +
  46.410 +static void ProcFS_remove(const char *name, ProcEntry *parent){
  46.411 +    remove_proc_entry(name, parent);
  46.412 +}
  46.413 +
  46.414 +static void ProcFS_rmrec_entry(ProcEntry *entry){
  46.415 +    if(entry){
  46.416 +        // Don't want to remove /proc itself!
  46.417 +        if(entry->parent == entry) return;
  46.418 +        while(entry->subdir){
  46.419 +            ProcFS_rmrec_entry(entry->subdir);
  46.420 +        }
  46.421 +        dprintf("> remove %s\n", entry->name);
  46.422 +        ProcFS_remove(entry->name, entry->parent);
  46.423 +    }
  46.424 +}
  46.425 +
  46.426 +static void ProcFS_rmrec(const char *name, ProcEntry *parent){
  46.427 +    ProcEntry *entry;
  46.428 +
  46.429 +    dprintf("> name=%s\n", name);
  46.430 +    entry = ProcFS_lookup(name, parent);
  46.431 +    if(entry){
  46.432 +        ProcFS_rmrec_entry(entry);
  46.433 +    }
  46.434 +    dprintf("<\n");
  46.435 +}
  46.436 +
  46.437 +static int stringof(Sxpr exp, char **s){
  46.438 +    int err = 0;
  46.439 +    if(ATOMP(exp)){
  46.440 +        *s = atom_name(exp);
  46.441 +    } else if(STRINGP(exp)){
  46.442 +        *s = string_string(exp);
  46.443 +    } else {
  46.444 +        err = -EINVAL;
  46.445 +        *s = NULL;
  46.446 +    }
  46.447 +    return err;
  46.448 +}
  46.449 +
  46.450 +static int child_string(Sxpr exp, Sxpr key, char **s){
  46.451 +    int err = 0;
  46.452 +    Sxpr val = sxpr_child_value(exp, key, ONONE);
  46.453 +    err = stringof(val, s);
  46.454 +    return err;
  46.455 +}
  46.456 +
  46.457 +static int intof(Sxpr exp, int *v){
  46.458 +    int err = 0;
  46.459 +    char *s;
  46.460 +    unsigned long l;
  46.461 +    if(INTP(exp)){
  46.462 +        *v = OBJ_INT(exp);
  46.463 +    } else {
  46.464 +        err = stringof(exp, &s);
  46.465 +        if(err) goto exit;
  46.466 +        err = convert_atoul(s, &l);
  46.467 +        *v = (int)l;
  46.468 +    }
  46.469 + exit:
  46.470 +    return err;
  46.471 +}
  46.472 +
  46.473 +static int child_int(Sxpr exp, Sxpr key, int *v){
  46.474 +    int err = 0;
  46.475 +    Sxpr val = sxpr_child_value(exp, key, ONONE);
  46.476 +    err = intof(val, v);
  46.477 +    return err;
  46.478 +}
  46.479 +
  46.480 +static int macof(Sxpr exp, unsigned char *v){
  46.481 +    int err = 0;
  46.482 +    char *s;
  46.483 +    err = stringof(exp, &s);
  46.484 +    if(err) goto exit;
  46.485 +    err = mac_aton(s, v);
  46.486 +  exit:
  46.487 +    return err;
  46.488 +}
  46.489 +
  46.490 +static int child_mac(Sxpr exp, Sxpr key, unsigned char *v){
  46.491 +    int err = 0;
  46.492 +    Sxpr val = sxpr_child_value(exp, key, ONONE);
  46.493 +    err = macof(val, v);
  46.494 +    return err;
  46.495 +}
  46.496 +
  46.497 +static int addrof(Sxpr exp, uint32_t *v){
  46.498 +    int err = 0;
  46.499 +    char *s;
  46.500 +    unsigned long w;
  46.501 +    err = stringof(exp, &s);
  46.502 +    if(err) goto exit;
  46.503 +    err = get_inet_addr(s, &w);
  46.504 +    if(err) goto exit;
  46.505 +    *v = (uint32_t)w;
  46.506 +  exit:
  46.507 +    return err;
  46.508 +}
  46.509 +
  46.510 +static int child_addr(Sxpr exp, Sxpr key, uint32_t *v){
  46.511 +    int err = 0;
  46.512 +    Sxpr val = sxpr_child_value(exp, key, ONONE);
  46.513 +    err = addrof(val, v);
  46.514 +    return err;
  46.515 +}
  46.516 +
  46.517 +/** Create a vnet.
  46.518 + * It is an error if a vnet with the same id exists.
  46.519 + *
  46.520 + * @param vnet vnet id
  46.521 + * @param security security level
  46.522 + * @return 0 on success, error code otherwise
  46.523 + */
  46.524 +static int ctrl_vnet_add(int vnet, int security){
  46.525 +    int err = 0;
  46.526 +    Vnet *vnetinfo = NULL;
  46.527 +    if(Vnet_lookup(vnet, &vnetinfo) == 0){
  46.528 +        err = -EEXIST;
  46.529 +        goto exit;
  46.530 +    }
  46.531 +    err = Vnet_alloc(&vnetinfo);
  46.532 +    if(err) goto exit;
  46.533 +    vnetinfo->vnet = vnet;
  46.534 +    vnetinfo->security = security;
  46.535 +    err = Vnet_create(vnetinfo);
  46.536 +  exit:
  46.537 +    if(vnetinfo) Vnet_decref(vnetinfo);
  46.538 +    return err;
  46.539 +}
  46.540 +
  46.541 +/** Delete a vnet.
  46.542 + *
  46.543 + * @param vnet vnet id
  46.544 + * @return 0 on success, error code otherwise
  46.545 + */
  46.546 +static int ctrl_vnet_del(int vnet){
  46.547 +    int err = -ENOSYS;
  46.548 +    // Can't delete if there are any vifs on the vnet.
  46.549 +    //Vnet_del(vnet);
  46.550 +    return err;
  46.551 +}
  46.552 +
  46.553 +/** Create an entry for a vif with the given vnet and vmac.
  46.554 + *
  46.555 + * @param vnet vnet id
  46.556 + * @param vmac mac address
  46.557 + * @return 0 on success, error code otherwise
  46.558 + */
  46.559 +static int ctrl_vif_add(int vnet, Vmac *vmac){
  46.560 +    int err = 0;
  46.561 +    Vnet *vnetinfo = NULL;
  46.562 +    Vif *vif = NULL;
  46.563 +
  46.564 +    dprintf(">\n");
  46.565 +    err = Vnet_lookup(vnet, &vnetinfo);
  46.566 +    if(err) goto exit;
  46.567 +    err = vif_add(vnet, vmac, &vif);
  46.568 +  exit:
  46.569 +    if(vnetinfo) Vnet_decref(vnetinfo);
  46.570 +    if(vif) vif_decref(vif);
  46.571 +    dprintf("< err=%d\n", err);
  46.572 +    return err;
  46.573 +}
  46.574 +
  46.575 +/** Add net device 'vifname' to the bridge for 'vnet' and
  46.576 + * create an entry for a vif with the given vnet and vmac.
  46.577 + * This is used when device 'vifname' is a virtual device
  46.578 + * connected to a vif in a vm.
  46.579 + *
  46.580 + * @param vifname name of device to bridge
  46.581 + * @param vnet vnet id
  46.582 + * @param vmac mac address
  46.583 + * @return 0 on success, error code otherwise
  46.584 + */
  46.585 +static int ctrl_vif_conn(char *vifname, int vnet, Vmac *vmac){
  46.586 +    int err = 0;
  46.587 +    Vnet *vnetinfo = NULL;
  46.588 +    struct net_device *vifdev = NULL;
  46.589 +    Vif *vif = NULL;
  46.590 +
  46.591 +    dprintf("> %s\n", vifname);
  46.592 +    err = Vnet_lookup(vnet, &vnetinfo);
  46.593 +    if(err) goto exit;
  46.594 +    err = vif_add(vnet, vmac, &vif);
  46.595 +    if(err) goto exit;
  46.596 +    err = vnet_get_device(vifname, &vifdev);
  46.597 +    if(err) goto exit;
  46.598 +    vif->dev = vifdev;
  46.599 +    err = vnet_add_if(vnetinfo, vifdev);
  46.600 +  exit:
  46.601 +    if(vnetinfo) Vnet_decref(vnetinfo);
  46.602 +    if(vif) vif_decref(vif);
  46.603 +    if(vifdev) dev_put(vifdev);
  46.604 +    dprintf("< err=%d\n", err);
  46.605 +    return err;
  46.606 +}
  46.607 +
  46.608 +/** Delete a vif.
  46.609 + *
  46.610 + * @param vnet vnet id
  46.611 + * @param vmac mac address
  46.612 + * @return 0 on success, error code otherwise
  46.613 + */
  46.614 +static int ctrl_vif_del(int vnet, Vmac *vmac){
  46.615 +    int err = 0;
  46.616 +    Vnet *vnetinfo = NULL;
  46.617 +    Vif *vif = NULL;
  46.618 +
  46.619 +    dprintf(">\n");
  46.620 +    err = Vnet_lookup(vnet, &vnetinfo);
  46.621 +    if(err) goto exit;
  46.622 +    err = vif_lookup(vnet, vmac, &vif);
  46.623 +    if(err) goto exit;
  46.624 +    if(vif->dev){
  46.625 +        vnet_del_if(vnetinfo, vif->dev);
  46.626 +        vif->dev = NULL;
  46.627 +    }
  46.628 +    vif_remove(vnet, vmac);
  46.629 +  exit:
  46.630 +    if(vnetinfo) Vnet_decref(vnetinfo);
  46.631 +    if(vif) vif_decref(vif);
  46.632 +    dprintf("< err=%d\n", err);
  46.633 +    return err;
  46.634 +}
  46.635 +
  46.636 +/** (varp.print)
  46.637 + */
  46.638 +static int eval_varp_print(Sxpr exp){
  46.639 +    int err = 0;
  46.640 +    varp_print();
  46.641 +    return err;
  46.642 +}
  46.643 +
  46.644 +/** (varp.mcaddr (addr <addr>))
  46.645 + */
  46.646 +static int eval_varp_mcaddr(Sxpr exp){
  46.647 +    int err =0;
  46.648 +    Sxpr oaddr = intern("addr");
  46.649 +    uint32_t addr;
  46.650 +
  46.651 +    err = child_addr(exp, oaddr, &addr);
  46.652 +    if(err < 0) goto exit;
  46.653 +    varp_set_mcast_addr(addr);
  46.654 +  exit:
  46.655 +    return err;
  46.656 +}
  46.657 +
  46.658 +/** (vnet.add (id <id>) [(security { none | auth | conf } )] )
  46.659 + */
  46.660 +static int eval_vnet_add(Sxpr exp){
  46.661 +    int err = 0;
  46.662 +    Sxpr oid = intern("id");
  46.663 +    Sxpr osecurity = intern("security");
  46.664 +    Sxpr csecurity;
  46.665 +    int id;
  46.666 +    char *security;
  46.667 +    int sec;
  46.668 +    err = child_int(exp, oid, &id);
  46.669 +    if(err) goto exit;
  46.670 +    if(id < VNET_VIF){ 
  46.671 +        err = -EINVAL;
  46.672 +        goto exit;
  46.673 +    }
  46.674 +    csecurity = sxpr_child_value(exp, osecurity, intern("none"));
  46.675 +    err = stringof(csecurity, &security);
  46.676 +    if(err) goto exit;
  46.677 +    if(strcmp(security, "none")==0){
  46.678 +        sec = 0;
  46.679 +    } else if(strcmp(security, "auth")==0){
  46.680 +        sec = SA_AUTH;
  46.681 +    } else if(strcmp(security, "conf")==0){
  46.682 +        sec = SA_CONF;
  46.683 +    } else {
  46.684 +        err = -EINVAL;
  46.685 +        goto exit;
  46.686 +    }
  46.687 +    dprintf("> vnet id=%d\n", id);
  46.688 +    err = ctrl_vnet_add(id, sec);
  46.689 + exit:
  46.690 +    dprintf("< err=%d\n", err);
  46.691 +    return err;
  46.692 +}
  46.693 +
  46.694 +/** Delete a vnet.
  46.695 + *
  46.696 + * (vnet.del (id <id>))
  46.697 + *
  46.698 + * @param vnet vnet id
  46.699 + * @return 0 on success, error code otherwise
  46.700 + */
  46.701 +static int eval_vnet_del(Sxpr exp){
  46.702 +    int err = 0;
  46.703 +    Sxpr oid = intern("id");
  46.704 +    int id;
  46.705 +
  46.706 +    err = child_int(exp, oid, &id);
  46.707 +    if(err) goto exit;
  46.708 +    err = ctrl_vnet_del(id);
  46.709 +  exit:
  46.710 +    return err;
  46.711 +}
  46.712 +
  46.713 +/** (vif.add (vnet <vnet>) (vmac <macaddr>))
  46.714 + */
  46.715 +static int eval_vif_add(Sxpr exp){
  46.716 +    int err = 0;
  46.717 +    Sxpr ovnet = intern("vnet");
  46.718 +    Sxpr ovmac = intern("vmac");
  46.719 +    int vnet;
  46.720 +    Vmac vmac = {};
  46.721 +
  46.722 +    err = child_int(exp, ovnet, &vnet);
  46.723 +    if(err) goto exit;
  46.724 +    err = child_mac(exp, ovmac, vmac.mac);
  46.725 +    if(err) goto exit;
  46.726 +    err = ctrl_vif_add(vnet, &vmac);
  46.727 +  exit:
  46.728 +    return err;
  46.729 +}
  46.730 +
  46.731 +/** (vif.conn (vif <name>) (vnet <id>) (vmac <mac>))
  46.732 + */
  46.733 +static int eval_vif_conn(Sxpr exp){
  46.734 +    int err = 0;
  46.735 +    Sxpr ovif = intern("vif");
  46.736 +    Sxpr ovnet = intern("vnet");
  46.737 +    Sxpr ovmac = intern("vmac");
  46.738 +    char *vif = NULL;
  46.739 +    int vnet = 0;
  46.740 +    Vmac vmac = {};
  46.741 +
  46.742 +    err = child_string(exp, ovif, &vif);
  46.743 +    if(err) goto exit;
  46.744 +    err = child_int(exp, ovnet, &vnet);
  46.745 +    if(err) goto exit;
  46.746 +    err = child_mac(exp, ovmac, vmac.mac);
  46.747 +    dprintf("> connect vif=%s vnet=%d\n", vif, vnet);
  46.748 +    err = ctrl_vif_conn(vif, vnet, &vmac);
  46.749 + exit:
  46.750 +    dprintf("< err=%d\n", err);
  46.751 +    return err;
  46.752 +}
  46.753 +
  46.754 +/** (vif.del (vnet <vnet>) (vmac <macaddr>))
  46.755 + */
  46.756 +static int eval_vif_del(Sxpr exp){
  46.757 +    int err = 0;
  46.758 +    Sxpr ovnet = intern("vnet");
  46.759 +    Sxpr ovmac = intern("vmac");
  46.760 +    int vnet;
  46.761 +    Vmac vmac = {};
  46.762 +
  46.763 +    err = child_int(exp, ovnet, &vnet);
  46.764 +    if(err) goto exit;
  46.765 +    err = child_mac(exp, ovmac, vmac.mac);
  46.766 +    if(err) goto exit;
  46.767 +    err = ctrl_vif_del(vnet, &vmac);
  46.768 +  exit:
  46.769 +    return err;
  46.770 +}
  46.771 +
  46.772 +typedef struct SxprEval {
  46.773 +    Sxpr elt;
  46.774 +    int (*fn)(Sxpr);
  46.775 +} SxprEval;
  46.776 +
  46.777 +static int eval(Sxpr exp){
  46.778 +    int err = 0;
  46.779 +    SxprEval defs[] = {
  46.780 +        { intern("varp.print"),   eval_varp_print   },
  46.781 +        { intern("varp.mcaddr"),  eval_varp_mcaddr  },
  46.782 +        { intern("vif.add"),      eval_vif_add      },
  46.783 +        { intern("vif.conn"),     eval_vif_conn     },
  46.784 +        { intern("vif.del"),      eval_vif_del      },
  46.785 +        { intern("vnet.add"),     eval_vnet_add     },
  46.786 +        { intern("vnet.del"),     eval_vnet_del     },
  46.787 +        { ONONE, N