direct-io.hg
changeset 3079:0dfcf477fdd3
bitkeeper revision 1.1159.183.3 (41a2188bAFjxwwkR-Q5G7XglkWtKfQ)
Add vnet files.
Add vnet files.
line diff
1.1 --- a/.rootkeys Mon Nov 22 16:41:50 2004 +0000 1.2 +++ b/.rootkeys Mon Nov 22 16:49:15 2004 +0000 1.3 @@ -543,6 +543,64 @@ 4104ffca-jPHLVOrW0n0VghEXXtKxg tools/sv/ 1.4 40fcefb3yMSrZvApO9ToIi-iQwnchA tools/sv/images/xen.png 1.5 41013a83z27rKvWIxAfUBMVZ1eDCDg tools/sv/inc/script.js 1.6 40fcefb3zGC9XNBkSwTEobCoq8YClA tools/sv/inc/style.css 1.7 +41a21888_WlknVWjSxb32Fo13_ujsw tools/vnet/00README 1.8 +41a21888bOiOJc7blzRbe4MNJoaYTw tools/vnet/Makefile 1.9 +41a21888mg2k5HeiVjlQYEtJBZT4Qg tools/vnet/doc/vnet-module.txt 1.10 +41a21888cuxfT8wjCdRR6V1lqf5NtA tools/vnet/doc/vnet-xend.txt 1.11 +41a21888xEQJAIGktS6XQ4xz2TyA5g tools/vnet/examples/Makefile 1.12 +41a21888FGQhPR5LJ1GRtOSIIN3QEw tools/vnet/examples/network-vnet 1.13 +41a21888QPgKrulCfR9SY_pxZKU0KA tools/vnet/examples/vnet97.sxp 1.14 +41a21888Gm0UBs1i7HqveT7Yz0u8DQ tools/vnet/examples/vnet98.sxp 1.15 +41a21888r4oGPuGv2Lxl-thgV3H54w tools/vnet/examples/vnet99.sxp 1.16 +41a21888c9TCRlUwJS9WBjB3e9aWgg tools/vnet/vnet-module/00README 1.17 +41a21888K2ItolEkksc1MUqyTDI_Kg tools/vnet/vnet-module/Makefile 1.18 +41a21888mJsFJD7bVMm-nrnWnalGBw tools/vnet/vnet-module/Makefile-2.4 1.19 +41a21888Znze3-UCCBZ-Nxpj-bNeHA tools/vnet/vnet-module/Makefile-2.6 1.20 +41a21889fwc1judJ7DYvyEviSJ3TPg tools/vnet/vnet-module/Makefile.ver 1.21 +41a21889m_sYkdODF3j5uhMP-Guy9Q tools/vnet/vnet-module/Makefile.vnet 1.22 +41a21889bXW2lC28U6KS_s5tOJ_W9Q tools/vnet/vnet-module/esp.c 1.23 +41a21889L2MfLDsUFQxstt-0frIVmw tools/vnet/vnet-module/esp.h 1.24 +41a21889V1jOsB2JExI-XQl720WHwg tools/vnet/vnet-module/etherip.c 1.25 +41a21889IpMYbNufHMDXe2ndNw4JxA tools/vnet/vnet-module/etherip.h 1.26 +41a21889LT9TNqO2EvTFIUTujrkX9w tools/vnet/vnet-module/if_etherip.h 1.27 +41a21889PESythGZFG6kmSoOkkN2Nw tools/vnet/vnet-module/if_varp.h 1.28 +41a21889nCPEomHqOyQ4vnhEm4II4g tools/vnet/vnet-module/linux/pfkeyv2.h 1.29 +41a21889A_fw4pRmCbBfZdtRunM5Eg tools/vnet/vnet-module/random.c 1.30 +41a218899Xy2dPKSu3pkuqaqkfKMTA tools/vnet/vnet-module/random.h 1.31 +41a21889rIH5S1dv8ygdSsTGNlg0JA tools/vnet/vnet-module/sa.c 1.32 +41a218896Z4vxy6gnV9h0fWRWu0lKQ tools/vnet/vnet-module/sa.h 1.33 +41a21889qFD8BTbDpB55uVmSVDEsgw tools/vnet/vnet-module/sa_algorithm.c 1.34 +41a21889r2AwTe-OCSSVMxBzz8uDtw tools/vnet/vnet-module/sa_algorithm.h 1.35 +41a21889tvjtL7O8tMveVB8MdSKPnQ tools/vnet/vnet-module/skb_context.c 1.36 +41a21889lD_QOUz2Msd7fB5rJQzfxA tools/vnet/vnet-module/skb_context.h 1.37 +41a21889F1r1xnJamzdeuClR8MNwQg tools/vnet/vnet-module/skb_util.c 1.38 +41a21889sS4bjVqEna24sS8NpV7SRA tools/vnet/vnet-module/skb_util.h 1.39 +41a21889MDawEK3J_f_oAGnZznhG2w tools/vnet/vnet-module/tunnel.c 1.40 +41a218896TlHXpVVqF50uz_u_WMXRw tools/vnet/vnet-module/tunnel.h 1.41 +41a21889nQYbJbqrOApg_RbkwPtXGg tools/vnet/vnet-module/varp.c 1.42 +41a21889Pev5MJlqqass6CxN4mmvPw tools/vnet/vnet-module/varp.h 1.43 +41a21889GbsHHfkpA-PkOvltfEwpMA tools/vnet/vnet-module/varp_socket.c 1.44 +41a21889sknn8zd5xCJlpQbs7MvxKg tools/vnet/vnet-module/vif.c 1.45 +41a21889VsKKWpe6rcXOSLPy2FuNWQ tools/vnet/vnet-module/vif.h 1.46 +41a21889dgkOyuSTVqy7D8TPIzrUyw tools/vnet/vnet-module/vnet.c 1.47 +41a21889ocAdwk7V1nNt4iBpmYW-Mw tools/vnet/vnet-module/vnet.h 1.48 +41a21889YrTiC0ArJSGFtiaHz2j1qQ tools/vnet/vnet-module/vnet_dev.c 1.49 +41a21889rHT4vrC4VAfk7-xP_K5aBg tools/vnet/vnet-module/vnet_dev.h 1.50 +41a21889qJj6GjT2f5hMHRvPS1AW4w tools/vnet/vnet-module/vnet_ioctl.c 1.51 +41a2188a8W4xYB0LYm512agtoEv52g tools/vnet/vnet-module/vnet_ioctl.h 1.52 +41a2188aFF_1T9OgpqUjjjaCqKB8lw tools/vnet/vnetd/Makefile 1.53 +41a2188a9j84qS4CxqMLVCvyGpA93w tools/vnet/vnetd/connection.c 1.54 +41a2188atexNEami9TNVYNkRSb7Bqg tools/vnet/vnetd/connection.h 1.55 +41a2188abgYpITSrWoMGHHrM56nklw tools/vnet/vnetd/marshal.c 1.56 +41a2188aUbOi5tAYwOS4aPixo1EGwQ tools/vnet/vnetd/marshal.h 1.57 +41a2188aDJlSVB1s_st2MSWxW8kMwg tools/vnet/vnetd/select.c 1.58 +41a2188aE9LUDdSSwNT3BWVWCvGSnQ tools/vnet/vnetd/select.h 1.59 +41a2188aTbMKv_Eig12dSrBUEBl1Jg tools/vnet/vnetd/timer.c 1.60 +41a2188aIzBGqQ6DUVzCxfBsN0Q6Ww tools/vnet/vnetd/timer.h 1.61 +41a2188aIf3Xk6uvk7KzjdpOsflAEw tools/vnet/vnetd/vcache.c 1.62 +41a2188ar6_vOO3_tEJQjmFVU3409A tools/vnet/vnetd/vcache.h 1.63 +41a2188aETrGU60X9WtGhYVfU7z0Pw tools/vnet/vnetd/vnetd.c 1.64 +41a2188ahYjemudGyB7078AWMFR-0w tools/vnet/vnetd/vnetd.h 1.65 4194e861IgTabTt8HOuh143QIJFD1Q tools/x2d2/Makefile 1.66 4194e861M2gcBz4i94cQYpqzi8n6UA tools/x2d2/cntrl_con.c 1.67 4194e8612TrrMvC8ZlA4h2ZYCPWz4g tools/x2d2/minixend.c
2.1 --- a/BitKeeper/etc/ignore Mon Nov 22 16:41:50 2004 +0000 2.2 +++ b/BitKeeper/etc/ignore Mon Nov 22 16:49:15 2004 +0000 2.3 @@ -59,6 +59,13 @@ tools/check/.* 2.4 tools/libxc/xen/* 2.5 tools/misc/miniterm/miniterm 2.6 tools/misc/xen_cpuperf 2.7 +tools/vnet/gc 2.8 +tools/vnet/gc*/* 2.9 +tools/vnet/vnet-module/.tmp_versions/* 2.10 +tools/vnet/vnet-module/.*.cmd 2.11 +tools/vnet/vnet-module/*.ko 2.12 +tools/vnet/vnet-module/vnet_module.mod.* 2.13 +tools/vnetd/vnetd 2.14 tools/web-shutdown.tap 2.15 tools/xentrace/xentrace 2.16 tools/xfrd/xfrd
3.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 3.2 +++ b/tools/vnet/00README Mon Nov 22 16:49:15 2004 +0000 3.3 @@ -0,0 +1,10 @@ 3.4 +This directory contains the implementation of vnets: 3.5 +virtual private networks for virtual machines. 3.6 +See doc/ for more information and examples/ for example 3.7 +configurations. 3.8 + 3.9 +The kernel module is in vnet-module/ and the vnet forwarding 3.10 +daemon is in vnetd/. The vnetd daemon makes vnets work across 3.11 +subnets when multicast routing is not available. 3.12 + 3.13 +Mike Wray <mike.wray@hp.com> 3.14 \ No newline at end of file
4.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 4.2 +++ b/tools/vnet/Makefile Mon Nov 22 16:49:15 2004 +0000 4.3 @@ -0,0 +1,42 @@ 4.4 + 4.5 +export LINUX_RELEASE ?=2.6 4.6 + 4.7 +all: compile 4.8 + 4.9 +compile: vnetd vnet-module 4.10 + 4.11 +gc.tar.gz: 4.12 + wget http://www.hpl.hp.com/personal/Hans_Boehm/gc/gc_source/$@ 4.13 + 4.14 +gc: gc.tar.gz 4.15 + tar xfz gc.tar.gz 4.16 + ln -sf gc?.? gc 4.17 + 4.18 +gc-install: gc 4.19 + (cd gc && ./configure --prefix=`pwd`/install && make && make install) 4.20 + 4.21 +gc-clean: 4.22 + -$(MAKE) -C gc clean 4.23 + 4.24 +gc-pristine: 4.25 + -rm -rf gc?.? gc 4.26 + 4.27 +.PHONY: vnetd vnet-module install dist clean 4.28 + 4.29 +vnetd: gc-install 4.30 + $(MAKE) -C vnetd 4.31 + 4.32 +vnet-module: 4.33 + $(MAKE) -C vnet-module 4.34 + 4.35 +install: compile 4.36 + $(MAKE) -C vnetd install 4.37 + $(MAKE) -C vnet-module install 4.38 + $(MAKE) -C examples install 4.39 + 4.40 +dist: $(TARGET) 4.41 + $(MAKE) prefix=`pwd`/../../install dist=yes install 4.42 + 4.43 +clean: 4.44 + -$(MAKE) -C vnetd clean 4.45 + -$(MAKE) -C vnet-module clean
5.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 5.2 +++ b/tools/vnet/doc/vnet-module.txt Mon Nov 22 16:49:15 2004 +0000 5.3 @@ -0,0 +1,50 @@ 5.4 +Vnet Module Command Interface 5.5 +Mike Wray <mike.wray@hp.com> 5.6 +2004/09/17 5.7 + 5.8 +When insmod the vnet-module creates /proc/vnet/policy which 5.9 +can be used to control the module by writing commands into it. 5.10 +The return code from the command should be returned by close. 5.11 + 5.12 +The commands are: 5.13 + 5.14 +(vnet.add (id <id>) [(security { none | auth | conf } )] ) 5.15 + 5.16 +Create the vnet with id <id> and the given security level (default none). 5.17 +Security levels: 5.18 +- none: no security 5.19 +- auth: message authentication (IPSEC hmac) 5.20 +- conf: message confidentiality (IPSEC hmac and encryption) 5.21 + 5.22 +(vnet.del (id <id>)) 5.23 + 5.24 +Delete the vnet with id <id>. 5.25 + 5.26 +(vif.add (vnet <vnetid>) (vmac <macaddr>)) 5.27 + 5.28 +Add the vif with MAC address <macaddr> to the vnet with id <vnetid>. 5.29 +This makes the vnet module respond to VARP requests for <macaddr> 5.30 +on vnet <vnetid>. 5.31 + 5.32 +(vif.del (vnet <vnetid>) (vmac <macaddr>)) 5.33 + 5.34 +Remove the vif with MAC address <macaddr> from the vnet with id <vnetid>. 5.35 +The vnet module will stop responding to VARP for the vif. 5.36 + 5.37 +Examples: 5.38 + 5.39 +To create vnet 10 with no security: 5.40 + 5.41 +echo '(vnet.add (id 10))' > /proc/vnet/policy 5.42 + 5.43 +To create vnet 11 with message authentication: 5.44 + 5.45 +echo '(vnet.add (id 11) (security auth))' > /proc/vnet/policy 5.46 + 5.47 +To add the vif with vmac "aa:00:00:bc:34:ae" to vnet 10: 5.48 + 5.49 +echo '(vif.add (vnet 10) (vmac aa:00:00:bc:34:ae))' > /proc/vnet/policy 5.50 + 5.51 +To remove the vif from the vnet: 5.52 + 5.53 +echo '(vif.del (vnet 10) (vmac aa:00:00:bc:34:ae))' > /proc/vnet/policy
6.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 6.2 +++ b/tools/vnet/doc/vnet-xend.txt Mon Nov 22 16:49:15 2004 +0000 6.3 @@ -0,0 +1,140 @@ 6.4 + 6.5 +Vnets: Virtual Networks for Virtual Machines 6.6 + 6.7 +Mike Wray <mike.wray@hp.com> 6.8 + 6.9 +0) Introduction 6.10 +--------------- 6.11 + 6.12 +Vnets provide virtual private LANs for virtual machines. 6.13 +This is done using bridging and tunneling. A virtual interface 6.14 +on a vnet can only see other interfaces on the same vnet - it cannot 6.15 +see the real network, and the real network cannot see it either. 6.16 + 6.17 +Virtual interfaces on the same vnet can be on the same machine 6.18 +or on different machines, they can still talk. The hosting machines 6.19 +can even be on different subnets if you run vnetd to forward, 6.20 +or have multicast routing enabled. 6.21 + 6.22 + 6.23 +1) Installing vnet support 6.24 +-------------------------- 6.25 + 6.26 +Assuming the code has been installed (make install in the parent directory), 6.27 +configure xend to use 'network-vnet' instead of the default 'network' to 6.28 +start up networking. This just loads the vnet module when networking starts. 6.29 + 6.30 +In /etc/xend/xend-config.sxp: 6.31 + 6.32 +Configure the network script: 6.33 + 6.34 +(network-script network-vnet) 6.35 + 6.36 +Restart xend. 6.37 + 6.38 +2) Creating vnets 6.39 +----------------- 6.40 + 6.41 +Xend already implements commands to add/remove vnets and 6.42 +bridge to them. To add a vnet use 6.43 + 6.44 +xm call vnet_add <vnet config file> 6.45 + 6.46 +For example, if vnet97.sxp contains: 6.47 + 6.48 +(vnet (id 97) (bridge vnet97) (vnetif vnetif97) (security none)) 6.49 + 6.50 +do 6.51 + 6.52 +xm call vnet_add vnet97.sxp 6.53 + 6.54 +This will define a vnet with id 97 and no security. The bridge for the 6.55 +vnet is called vnet97 and the virtual interface for it is vnetif97. 6.56 +To add an interface on a vm to this vnet simply set its bridge to vnet97 6.57 +in its configuration. 6.58 + 6.59 +In Python: 6.60 + 6.61 +vif="bridge=vnet97" 6.62 + 6.63 +In sxp: 6.64 + 6.65 +(dev (vif (mac aa:00:00:01:02:03) (bridge vnet97))) 6.66 + 6.67 +Once configured, vnets are persistent in the xend database. 6.68 +To remove a vnet use 6.69 + 6.70 +xm call vnet_delete <vnet id> 6.71 + 6.72 +To list vnets use 6.73 + 6.74 +xm call vnets 6.75 + 6.76 +To get information on a vnet id use 6.77 + 6.78 +xm call vnet <vnet id> 6.79 + 6.80 +3) Troubleshooting 6.81 +------------------ 6.82 + 6.83 +The vnet module should appear in 'lsmod'. 6.84 +If a vnet has been configured it should appear in the output of 'xm call vnets'. 6.85 +Its bridge and interface should appear in 'ifconfig'. 6.86 +It should also show in 'brctl show', with its attached interfaces. 6.87 + 6.88 +You can 'see into' a vnet from dom0 if you put an IP address on the bridge. 6.89 +For example, if you have vnet97 with a vm with ip addr 10.0.0.12 on it, 6.90 +then 6.91 + 6.92 +ifconfig vnet97 10.0.0.20 up 6.93 + 6.94 +should let you ping 10.0.0.12 via the vnet97 bridge. 6.95 + 6.96 +4) Examples 6.97 +----------- 6.98 + 6.99 +Here's the full config for a vm on vnet 97, using ip addr 10.0.0.12: 6.100 + 6.101 +(vm 6.102 + (name dom12) 6.103 + (memory '64') 6.104 + (cpu '1') 6.105 + (console '8502') 6.106 + (image 6.107 + (linux 6.108 + (kernel /boot/vmlinuz-2.6.9-xenU) 6.109 + (ip 10.0.0.12:1.2.3.4::::eth0:off) 6.110 + (root /dev/hda1) 6.111 + (args 'rw fastboot 4') 6.112 + ) 6.113 + ) 6.114 + (device (vbd (uname phy:hda2) (dev hda1) (mode w))) 6.115 + (device (vif (mac aa:00:00:11:00:12) (bridge vnet97))) 6.116 +) 6.117 + 6.118 +If you run another vm on the same vnet: 6.119 + 6.120 +(vm 6.121 + (name dom11) 6.122 + (memory '64') 6.123 + (cpu '1') 6.124 + (console '8501') 6.125 + (image 6.126 + (linux 6.127 + (kernel /boot/vmlinuz-2.6.9-xenU) 6.128 + (ip 10.0.0.11:1.2.3.4::::eth0:off) 6.129 + (root /dev/hda1) 6.130 + (args 'rw fastboot 4') 6.131 + ) 6.132 + ) 6.133 + (device (vbd (uname phy:hda3) (dev hda1) (mode w))) 6.134 + (device (vif (mac aa:00:00:11:00:11) (bridge vnet97))) 6.135 +) 6.136 + 6.137 +the vms should be able to talk over the vnet. Check with ping. 6.138 +If they are both on the same machine the connection will simply 6.139 +be the vnet97 bridge, if they are on separate machines their 6.140 +packets will be tunneled in etherip. They should be able to 6.141 +see each other, but not the real network. 6.142 + 6.143 +
7.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 7.2 +++ b/tools/vnet/examples/Makefile Mon Nov 22 16:49:15 2004 +0000 7.3 @@ -0,0 +1,12 @@ 7.4 +# -*- mode: Makefile; -*- 7.5 +#============================================================================ 7.6 + 7.7 +XEN_SCRIPT_DIR:=/etc/xen/scripts 7.8 + 7.9 +all: 7.10 + 7.11 +install: 7.12 + install -m 0755 -d $(prefix)$(XEN_SCRIPT_DIR) 7.13 + install -m 0554 network-vnet $(prefix)$(XEN_SCRIPT_DIR) 7.14 + 7.15 +clean: 7.16 \ No newline at end of file
8.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 8.2 +++ b/tools/vnet/examples/network-vnet Mon Nov 22 16:49:15 2004 +0000 8.3 @@ -0,0 +1,218 @@ 8.4 +#!/bin/sh 8.5 +#============================================================================ 8.6 +# Default Xen network start/stop script. 8.7 +# Xend calls a network script when it starts. 8.8 +# The script name to use is defined in /etc/xen/xend-config.sxp 8.9 +# in the network-script field. 8.10 +# 8.11 +# This script creates a bridge (default xen-br0), adds a device 8.12 +# (default eth0) to it, copies the IP addresses from the device 8.13 +# to the bridge and adjusts the routes accordingly. 8.14 +# 8.15 +# If all goes well, this should ensure that networking stays up. 8.16 +# However, some configurations are upset by this, especially 8.17 +# NFS roots. If the bridged setup does not meet your needs, 8.18 +# configure a different script, for example using routing instead. 8.19 +# 8.20 +# Usage: 8.21 +# 8.22 +# network (start|stop|status) {VAR=VAL}* 8.23 +# 8.24 +# Vars: 8.25 +# 8.26 +# bridge The bridge to use (default xen-br0). 8.27 +# netdev The interface to add to the bridge (default eth0). 8.28 +# antispoof Whether to use iptables to prevent spoofing (default yes). 8.29 +# 8.30 +# start: 8.31 +# Creates the bridge and enslaves netdev to it. 8.32 +# Copies the IP addresses from netdev to the bridge. 8.33 +# Deletes the routes to netdev and adds them on bridge. 8.34 +# 8.35 +# stop: 8.36 +# Removes netdev from the bridge. 8.37 +# Deletes the routes to bridge and adds them to netdev. 8.38 +# 8.39 +# status: 8.40 +# Print ifconfig for netdev and bridge. 8.41 +# Print routes. 8.42 +# 8.43 +#============================================================================ 8.44 + 8.45 +# Exit if anything goes wrong. 8.46 +set -e 8.47 + 8.48 +# First arg is the operation. 8.49 +OP=$1 8.50 +shift 8.51 + 8.52 +# Pull variables in args in to environment. 8.53 +for arg ; do export "${arg}" ; done 8.54 + 8.55 +bridge=${bridge:-xen-br0} 8.56 +netdev=${netdev:-eth0} 8.57 +antispoof=${antispoof:-yes} 8.58 + 8.59 +echo "network $OP bridge=$bridge netdev=$netdev antispoof=$antispoof" 8.60 + 8.61 +# Usage: transfer_addrs src dst 8.62 +# Copy all IP addresses (including aliases) from device $src to device $dst. 8.63 +transfer_addrs () { 8.64 + local src=$1 8.65 + local dst=$2 8.66 + # Don't bother if $dst already has IP addresses. 8.67 + if ip addr show dev ${dst} | egrep -q '^ *inet' ; then 8.68 + return 8.69 + fi 8.70 + # Address lines start with 'inet' and have the device in them. 8.71 + # Replace 'inet' with 'ip addr add' and change the device name $src 8.72 + # to 'dev $src'. Remove netmask as we'll add routes later. 8.73 + ip addr show dev ${src} | egrep '^ *inet' | sed -e " 8.74 +s/inet/ip addr add/ 8.75 +s@\([0-9]\+\.[0-9]\+\.[0-9]\+\.[0-9]\+\)/[0-9]\+@\1@ 8.76 +s/${src}/dev ${dst}/ 8.77 +" | sh -e 8.78 +} 8.79 + 8.80 +# Usage: transfer_routes src dst 8.81 +# Get all IP routes to device $src, delete them, and 8.82 +# add the same routes to device $dst. 8.83 +# The original routes have to be deleted, otherwise adding them 8.84 +# for $dst fails (duplicate routes). 8.85 +transfer_routes () { 8.86 + local src=$1 8.87 + local dst=$2 8.88 + # List all routes and grep the ones with $src in. 8.89 + # Stick 'ip route del' on the front to delete. 8.90 + # Change $src to $dst and use 'ip route add' to add. 8.91 + ip route list | grep ${src} | sed -e " 8.92 +h 8.93 +s/^/ip route del / 8.94 +P 8.95 +g 8.96 +s/${src}/${dst}/ 8.97 +s/^/ip route add / 8.98 +P 8.99 +d 8.100 +" | sh -e 8.101 +} 8.102 + 8.103 +# Usage: create_bridge dev bridge 8.104 +# Create bridge $bridge and add device $dev to it. 8.105 +create_bridge () { 8.106 + local dev=$1 8.107 + local bridge=$2 8.108 + 8.109 + # Don't create the bridge if it already exists. 8.110 + if ! brctl show | grep -q ${bridge} ; then 8.111 + brctl addbr ${bridge} 8.112 + brctl stp ${bridge} off 8.113 + brctl setfd ${bridge} 0 8.114 + fi 8.115 + ifconfig ${bridge} up 8.116 +} 8.117 + 8.118 +# Usage: antispoofing dev bridge 8.119 +# Set the default forwarding policy for $dev to drop. 8.120 +# Allow forwarding to the bridge. 8.121 +antispoofing () { 8.122 + local dev=$1 8.123 + local bridge=$2 8.124 + 8.125 + iptables -P FORWARD DROP 8.126 + iptables -A FORWARD -m physdev --physdev-in ${dev} -j ACCEPT 8.127 +} 8.128 + 8.129 +# Usage: show_status dev bridge 8.130 +# Print ifconfig and routes. 8.131 +show_status () { 8.132 + local dev=$1 8.133 + local bridge=$2 8.134 + 8.135 + echo '============================================================' 8.136 + ifconfig ${dev} 8.137 + ifconfig ${bridge} 8.138 + echo ' ' 8.139 + ip route list 8.140 + echo ' ' 8.141 + route -n 8.142 + echo '============================================================' 8.143 +} 8.144 + 8.145 +# Insert the vnet module if it can be found and 8.146 +# it's not already there. 8.147 +vnet_insert () { 8.148 + local module="vnet_module" 8.149 + local mod_dir=/lib/modules/$(uname -r)/kernel 8.150 + local mod_path="${mod_dir}/${module}" 8.151 + local mod_obj="" 8.152 + 8.153 + for ext in ".o" ".ko" ; do 8.154 + f=${mod_path}${ext} 8.155 + if [ -f ${f} ] ; then 8.156 + mod_obj=$f 8.157 + break 8.158 + fi 8.159 + done 8.160 + if [ "${mod_obj}" == "" ] ; then 8.161 + return 8.162 + fi 8.163 + if lsmod | grep -q ${module} ; then 8.164 + echo "VNET: ${module} loaded" 8.165 + else 8.166 + echo "VNET: Loading ${module}..." 8.167 + insmod ${mod_obj} 8.168 + fi 8.169 +} 8.170 + 8.171 +op_start () { 8.172 + if [ "${bridge}" == "null" ] ; then 8.173 + return 8.174 + fi 8.175 + # Create the bridge and give it the interface IP addresses. 8.176 + # Move the interface routes onto the bridge. 8.177 + create_bridge ${netdev} ${bridge} 8.178 + transfer_addrs ${netdev} ${bridge} 8.179 + transfer_routes ${netdev} ${bridge} 8.180 + # Don't add $dev to $bridge if it's already on a bridge. 8.181 + if ! brctl show | grep -q ${netdev} ; then 8.182 + brctl addif ${bridge} ${netdev} 8.183 + fi 8.184 + 8.185 + if [ ${antispoof} == 'yes' ] ; then 8.186 + antispoofing ${netdev} ${bridge} 8.187 + fi 8.188 + 8.189 + vnet_insert 8.190 +} 8.191 + 8.192 +op_stop () { 8.193 + if [ "${bridge}" == "null" ] ; then 8.194 + return 8.195 + fi 8.196 + # Remove the interface from the bridge. 8.197 + # Move the routes back to the interface. 8.198 + brctl delif ${bridge} ${netdev} 8.199 + transfer_routes ${bridge} ${netdev} 8.200 + 8.201 + # It's not our place to be enabling forwarding... 8.202 +} 8.203 + 8.204 +case ${OP} in 8.205 + start) 8.206 + op_start 8.207 + ;; 8.208 + 8.209 + stop) 8.210 + op_stop 8.211 + ;; 8.212 + 8.213 + status) 8.214 + show_status ${netdev} ${bridge} 8.215 + ;; 8.216 + 8.217 + *) 8.218 + echo 'Unknown command: ' ${OP} 8.219 + echo 'Valid commands are: start, stop, status' 8.220 + exit 1 8.221 +esac
9.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 9.2 +++ b/tools/vnet/examples/vnet97.sxp Mon Nov 22 16:49:15 2004 +0000 9.3 @@ -0,0 +1,3 @@ 9.4 +# Vnet configuration for a vnet with id 97 and no security. 9.5 +# Configure using 'xm call vnet_add vnet97.sxp'. 9.6 +(vnet (id 97) (bridge vnet97) (vnetif vnetif97) (security none))
10.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 10.2 +++ b/tools/vnet/examples/vnet98.sxp Mon Nov 22 16:49:15 2004 +0000 10.3 @@ -0,0 +1,3 @@ 10.4 +# Vnet configuration for a vnet with id 98 and message authentication. 10.5 +# Configure using 'xm call vnet_add vnet98.sxp'. 10.6 +(vnet (id 98) (bridge vnet98) (vnetif vnetif98) (security auth))
11.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 11.2 +++ b/tools/vnet/examples/vnet99.sxp Mon Nov 22 16:49:15 2004 +0000 11.3 @@ -0,0 +1,3 @@ 11.4 +# Vnet configuration for a vnet with id 99 and message confidentiality. 11.5 +# Configure using 'xm call vnet_add vnet99.sxp'. 11.6 +(vnet (id 99) (bridge vnet99) (vnetif vnetif99) (security conf))
12.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 12.2 +++ b/tools/vnet/vnet-module/00README Mon Nov 22 16:49:15 2004 +0000 12.3 @@ -0,0 +1,41 @@ 12.4 +Vnet module for network virtualization. 12.5 +Mike Wray <mike.wray@hp.com> 12.6 + 12.7 +*) Compiling 12.8 +The vnet module can be compiled for 2.4 or 2.6 series kernels. 12.9 +The makefiles use the following variables, which 12.10 +can be set in your env or on the make command line: 12.11 + 12.12 +LINUX_RELEASE: linux release to compile for, 2.4 (default), or 2.6. 12.13 +XENO_ROOT: root of the xen tree containing kernel source. Default '..'. 12.14 +ROOT: root path to install in, default is XENO_ROOT/install. 12.15 + Set to '/' to install relative to filesystem root. 12.16 +KERNEL_VERSION: kernel version, default got from XENO_ROOT. 12.17 +KERNEL_MINOR: kernel minor version, default -xen0. 12.18 +KERNEL_SRC: path to kernel source, default linux-<VERSION> under XENO_ROOT. 12.19 + 12.20 +*) For 2.4 kernel 12.21 + 12.22 +To compile from scratch: 12.23 + 12.24 +make clean 12.25 +make 12.26 + 12.27 +This will build vnet_module.o in the current directory. 12.28 +To install the module use 12.29 + 12.30 +make install 12.31 + 12.32 +*) For 2.6 kernel 12.33 + 12.34 +To compile from scratch: 12.35 + 12.36 +make clean 12.37 +make LINUX_RELEASE=2.6 12.38 + 12.39 +This will build vnet_module.ko in the current directory. 12.40 +To install the module use 12.41 + 12.42 +make LINUX_RELEASE=2.6 install 12.43 + 12.44 +
13.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 13.2 +++ b/tools/vnet/vnet-module/Makefile Mon Nov 22 16:49:15 2004 +0000 13.3 @@ -0,0 +1,67 @@ 13.4 +# -*- mode: Makefile; -*- 13.5 +#============================================================================ 13.6 +# 13.7 +# Copyright (C) 2004 Mike Wray <mike.wray@hp.com> 13.8 +# 13.9 +# This program is free software; you can redistribute it and/or modify 13.10 +# it under the terms of the GNU General Public License as published by the 13.11 +# Free Software Foundation; either version 2 of the License, or (at your 13.12 +# option) any later version. 13.13 +# 13.14 +# This program is distributed in the hope that it will be useful, but 13.15 +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 13.16 +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13.17 +# for more details. 13.18 +# 13.19 +# You should have received a copy of the GNU General Public License along 13.20 +# with this program; if not, write to the Free software Foundation, Inc., 13.21 +# 59 Temple Place, suite 330, Boston, MA 02111-1307 USA 13.22 +#============================================================================ 13.23 + 13.24 +#============================================================================ 13.25 +ifeq ($(src),) 13.26 +LINUX_RELEASE ?=2.6 13.27 + 13.28 +include Makefile-$(LINUX_RELEASE) 13.29 + 13.30 +#============================================================================ 13.31 +else 13.32 +#============================================================================ 13.33 +# This section is for the 2.6 kbuild. 13.34 + 13.35 +#$(warning KBUILD_EXTMOD $(KBUILD_EXTMOD)) 13.36 +#$(warning src $(src)) 13.37 +#$(warning obj $(obj)) 13.38 + 13.39 +include $(src)/Makefile.vnet 13.40 + 13.41 +obj-m = vnet_module.o 13.42 +vnet_module-objs = $(VNET_OBJ) 13.43 +vnet_module-objs += $(VNET_LIB_OBJ) 13.44 + 13.45 +#---------------------------------------------------------------------------- 13.46 +# The fancy stuff in the kernel build defeats 'vpath %.c' so we can't 13.47 +# use that to get the lib files compiled. 13.48 +# Setup explicit rules for them using the kbuild C compile rule. 13.49 + 13.50 +# File names in the lib dir. 13.51 +remote_srcs = $(foreach file,$(VNET_LIB_SRC),$(LIB_DIR)/$(file)) 13.52 + 13.53 +# Equivalent file names here. 13.54 +local_srcs = $(foreach file,$(VNET_LIB_SRC),$(src)/$(file)) 13.55 + 13.56 +# Objects for the local names. 13.57 +local_objs = $(local_srcs:.c=.o) 13.58 + 13.59 +# Make the local objects depend on compiling the remote sources. 13.60 +$(local_objs): $(src)/%.o: $(LIB_DIR)/%.c 13.61 + $(call if_changed_rule,cc_o_c) 13.62 +#---------------------------------------------------------------------------- 13.63 + 13.64 +vpath %.h $(LIB_DIR) 13.65 +EXTRA_CFLAGS += -I $(LIB_DIR) 13.66 +EXTRA_CFLAGS += -I $(src) 13.67 + 13.68 +endif 13.69 +#============================================================================ 13.70 +
14.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 14.2 +++ b/tools/vnet/vnet-module/Makefile-2.4 Mon Nov 22 16:49:15 2004 +0000 14.3 @@ -0,0 +1,97 @@ 14.4 +# -*- mode: Makefile; -*- 14.5 +#============================================================================ 14.6 +# 14.7 +# Copyright (C) 2004 Mike Wray <mike.wray@hp.com> 14.8 +# 14.9 +# This program is free software; you can redistribute it and/or modify 14.10 +# it under the terms of the GNU General Public License as published by the 14.11 +# Free Software Foundation; either version 2 of the License, or (at your 14.12 +# option) any later version. 14.13 +# 14.14 +# This program is distributed in the hope that it will be useful, but 14.15 +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14.16 +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14.17 +# for more details. 14.18 +# 14.19 +# You should have received a copy of the GNU General Public License along 14.20 +# with this program; if not, write to the Free software Foundation, Inc., 14.21 +# 59 Temple Place, suite 330, Boston, MA 02111-1307 USA 14.22 +#============================================================================ 14.23 + 14.24 +#============================================================================ 14.25 +# Vnet module makefile for 2.4 series kernels. 14.26 + 14.27 +include Makefile.ver 14.28 + 14.29 +KERNEL_MODULE := vnet_module.o 14.30 + 14.31 +CONFIG_MODVERSIONS := $(shell grep 'CONFIG_MODVERSIONS=y' $(KERNEL_SRC)/.config && echo 1 || echo 0) 14.32 + 14.33 +include Makefile.vnet 14.34 + 14.35 +VNET_OBJ += $(VNET_LIB_OBJ) 14.36 + 14.37 +#---------------------------------------------------------------------------- 14.38 + 14.39 +vpath %.h $(KERNEL_SRC)/include 14.40 +INCLUDES+= -I $(KERNEL_SRC)/include 14.41 + 14.42 +vpath %.h $(LIB_DIR) 14.43 +vpath %.c $(LIB_DIR) 14.44 +INCLUDES += -I $(LIB_DIR) 14.45 + 14.46 +INCLUDES+= -I . 14.47 + 14.48 +#---------------------------------------------------------------------------- 14.49 + 14.50 +CPPFLAGS += -D__KERNEL__ 14.51 +CPPFLAGS += -DMODULE 14.52 + 14.53 +ifeq ($(CONFIG_MODVERSIONS), 1) 14.54 +CPPFLAGS += -DMODVERSIONS 14.55 +CPPFLAGS += -include $(KERNEL_SRC)/include/linux/modversions.h 14.56 +endif 14.57 + 14.58 +CPPFLAGS += $(INCLUDES) 14.59 + 14.60 +CFLAGS += -Wall 14.61 +CFLAGS += -Wstrict-prototypes 14.62 +CFLAGS += -Wno-trigraphs 14.63 +CFLAGS += -Wno-unused-function 14.64 +CFLAGS += -Wno-unused-parameter 14.65 + 14.66 +CFLAGS += -g 14.67 +CFLAGS += -O2 14.68 +CFLAGS += -fno-strict-aliasing 14.69 +CFLAGS += -fno-common 14.70 +#CFLAGS += -fomit-frame-pointer 14.71 + 14.72 +# Dependencies. Gcc generates them for us. 14.73 +CFLAGS += -Wp,-MD,.$(@F).d 14.74 +VNET_DEP = .*.d 14.75 +#---------------------------------------------------------------------------- 14.76 + 14.77 +.PHONY: all 14.78 +all: module 14.79 + 14.80 +.PHONY: module modules 14.81 +module modules: $(KERNEL_MODULE) 14.82 + 14.83 +$(KERNEL_MODULE): $(VNET_OBJ) 14.84 + $(LD) -r -o $@ $^ 14.85 + 14.86 +.PHONY: install install-module modules_install 14.87 +install install-module modules_install: module 14.88 + install -m 0755 -d $(prefix)$(KERNEL_MODULE_DIR) 14.89 + install -m 0554 $(KERNEL_MODULE) $(prefix)$(KERNEL_MODULE_DIR) 14.90 + 14.91 +TAGS: 14.92 + etags *.c *.h 14.93 + 14.94 +.PHONY: clean 14.95 +clean: 14.96 + @rm -f *.a *.o *.ko *~ 14.97 + @rm -f $(VNET_DEP) .*.cmd *.mod.? 14.98 + @rm -rf .tmp_versions 14.99 + 14.100 +-include $(VNET_DEP)
15.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 15.2 +++ b/tools/vnet/vnet-module/Makefile-2.6 Mon Nov 22 16:49:15 2004 +0000 15.3 @@ -0,0 +1,51 @@ 15.4 +# -*- mode: Makefile; -*- 15.5 +#============================================================================ 15.6 +# 15.7 +# Copyright (C) 2004 Mike Wray <mike.wray@hp.com> 15.8 +# 15.9 +# This program is free software; you can redistribute it and/or modify 15.10 +# it under the terms of the GNU General Public License as published by the 15.11 +# Free Software Foundation; either version 2 of the License, or (at your 15.12 +# option) any later version. 15.13 +# 15.14 +# This program is distributed in the hope that it will be useful, but 15.15 +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15.16 +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15.17 +# for more details. 15.18 +# 15.19 +# You should have received a copy of the GNU General Public License along 15.20 +# with this program; if not, write to the Free software Foundation, Inc., 15.21 +# 59 Temple Place, suite 330, Boston, MA 02111-1307 USA 15.22 +#============================================================================ 15.23 + 15.24 +#============================================================================ 15.25 +# Vnet module makefile for 2.6 series kernels. 15.26 + 15.27 +LINUX_RELEASE ?= 2.6 15.28 +include Makefile.ver 15.29 + 15.30 +KERNEL_MODULE = vnet_module.ko 15.31 + 15.32 +#---------------------------------------------------------------------------- 15.33 +#export KBUILD_VERBOSE=1 15.34 + 15.35 +.PHONY: all 15.36 +all: module 15.37 + 15.38 +.PHONY: module 15.39 +module modules: 15.40 + $(MAKE) -C $(KERNEL_SRC) M=`pwd` modules 15.41 + 15.42 +.PHONY: install install-module modules_install 15.43 +install install-module modules_install: module 15.44 + install -m 0755 -d $(prefix)$(KERNEL_MODULE_DIR) 15.45 + install -m 0554 $(KERNEL_MODULE) $(prefix)$(KERNEL_MODULE_DIR) 15.46 + 15.47 +.PHONY: clean 15.48 +clean: 15.49 + @$(MAKE) -C $(KERNEL_SRC) M=$(PWD) clean 15.50 + @rm -f *.a *.o *.ko *~ .*.d .*.cmd *.mod.? 15.51 + 15.52 +TAGS: 15.53 + etags *.c *.h 15.54 +
16.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 16.2 +++ b/tools/vnet/vnet-module/Makefile.ver Mon Nov 22 16:49:15 2004 +0000 16.3 @@ -0,0 +1,49 @@ 16.4 +# -*- mode: Makefile; -*- 16.5 +#============================================================================ 16.6 +# 16.7 +# Copyright (C) 2004 Mike Wray <mike.wray@hp.com> 16.8 +# 16.9 +# This program is free software; you can redistribute it and/or modify 16.10 +# it under the terms of the GNU General Public License as published by the 16.11 +# Free Software Foundation; either version 2 of the License, or (at your 16.12 +# option) any later version. 16.13 +# 16.14 +# This program is distributed in the hope that it will be useful, but 16.15 +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 16.16 +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 16.17 +# for more details. 16.18 +# 16.19 +# You should have received a copy of the GNU General Public License along 16.20 +# with this program; if not, write to the Free software Foundation, Inc., 16.21 +# 59 Temple Place, suite 330, Boston, MA 02111-1307 USA 16.22 +#============================================================================ 16.23 + 16.24 +#---------------------------------------------------------------------------- 16.25 +# Xeno/xen. 16.26 + 16.27 +# Root of xen tree. 16.28 +XEN_ROOT ?=../../.. 16.29 + 16.30 +# Path to relativize the install. Set to / 16.31 +# to install relative to filesystem root. 16.32 +prefix ?=$(XEN_ROOT)/install/ 16.33 +#---------------------------------------------------------------------------- 16.34 + 16.35 +LINUX_RELEASE ?=2.6 16.36 +KERNEL_MINOR ?=-xen0 16.37 + 16.38 +LINUX_VERSION ?= $(shell ( /bin/ls -ld $(XEN_ROOT)/linux-$(LINUX_RELEASE).*-xen-sparse ) 2>/dev/null | \ 16.39 + sed -e 's!^.*linux-\(.\+\)-xen-sparse!\1!' ) 16.40 + 16.41 +ifeq ($(LINUX_VERSION),) 16.42 +$(error Kernel source for linux $(LINUX_RELEASE) not found) 16.43 +endif 16.44 + 16.45 +KERNEL_VERSION =$(LINUX_VERSION)$(KERNEL_MINOR) 16.46 + 16.47 +KERNEL_SRC ?= $(XEN_ROOT)/linux-$(KERNEL_VERSION) 16.48 + 16.49 +KERNEL_MODULE_DIR = /lib/modules/$(KERNEL_VERSION)/kernel 16.50 + 16.51 +#$(warning KERNEL_VERSION $(KERNEL_VERSION)) 16.52 +#$(warning KERNEL_SRC $(KERNEL_SRC))
17.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 17.2 +++ b/tools/vnet/vnet-module/Makefile.vnet Mon Nov 22 16:49:15 2004 +0000 17.3 @@ -0,0 +1,57 @@ 17.4 +# -*- mode: Makefile; -*- 17.5 +#============================================================================ 17.6 +# 17.7 +# Copyright (C) 2004 Mike Wray <mike.wray@hp.com> 17.8 +# 17.9 +# This program is free software; you can redistribute it and/or modify 17.10 +# it under the terms of the GNU General Public License as published by the 17.11 +# Free Software Foundation; either version 2 of the License, or (at your 17.12 +# option) any later version. 17.13 +# 17.14 +# This program is distributed in the hope that it will be useful, but 17.15 +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 17.16 +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 17.17 +# for more details. 17.18 +# 17.19 +# You should have received a copy of the GNU General Public License along 17.20 +# with this program; if not, write to the Free software Foundation, Inc., 17.21 +# 59 Temple Place, suite 330, Boston, MA 02111-1307 USA 17.22 +#============================================================================ 17.23 + 17.24 +ifeq ($(src),) 17.25 +SRC_DIR= 17.26 +else 17.27 +SRC_DIR=$(src)/ 17.28 +endif 17.29 + 17.30 +LIB_DIR := $(SRC_DIR)../../libxutil 17.31 + 17.32 +VNET_SRC := 17.33 +VNET_SRC += esp.c 17.34 +VNET_SRC += etherip.c 17.35 +VNET_SRC += random.c 17.36 +VNET_SRC += sa_algorithm.c 17.37 +VNET_SRC += sa.c 17.38 +VNET_SRC += skb_context.c 17.39 +VNET_SRC += skb_util.c 17.40 +VNET_SRC += tunnel.c 17.41 +VNET_SRC += varp.c 17.42 +VNET_SRC += varp_socket.c 17.43 +VNET_SRC += vif.c 17.44 +VNET_SRC += vnet.c 17.45 +VNET_SRC += vnet_dev.c 17.46 +VNET_SRC += vnet_ioctl.c 17.47 + 17.48 +VNET_LIB_SRC += allocate.c 17.49 +VNET_LIB_SRC += enum.c 17.50 +VNET_LIB_SRC += hash_table.c 17.51 +VNET_LIB_SRC += iostream.c 17.52 +VNET_LIB_SRC += kernel_stream.c 17.53 +VNET_LIB_SRC += sxpr.c 17.54 +VNET_LIB_SRC += sxpr_parser.c 17.55 +VNET_LIB_SRC += sys_net.c 17.56 +VNET_LIB_SRC += sys_string.c 17.57 + 17.58 +VNET_OBJ := $(VNET_SRC:.c=.o) 17.59 +VNET_LIB_OBJ := $(VNET_LIB_SRC:.c=.o) 17.60 +
18.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 18.2 +++ b/tools/vnet/vnet-module/esp.c Mon Nov 22 16:49:15 2004 +0000 18.3 @@ -0,0 +1,863 @@ 18.4 +/* 18.5 + * Copyright (C) 2004 Mike Wray <mike.wray@hp.com> 18.6 + * 18.7 + * This program is free software; you can redistribute it and/or modify 18.8 + * it under the terms of the GNU General Public License as published by the 18.9 + * Free Software Foundation; either version 2 of the License, or (at your 18.10 + * option) any later version. 18.11 + * 18.12 + * This program is distributed in the hope that it will be useful, but 18.13 + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 18.14 + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 18.15 + * for more details. 18.16 + * 18.17 + * You should have received a copy of the GNU General Public License along 18.18 + * with this program; if not, write to the Free software Foundation, Inc., 18.19 + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA 18.20 + * 18.21 + */ 18.22 +#include <linux/config.h> 18.23 +#include <linux/module.h> 18.24 +#include <linux/types.h> 18.25 +#include <linux/sched.h> 18.26 +#include <linux/kernel.h> 18.27 +#include <asm/uaccess.h> 18.28 + 18.29 +#include <linux/init.h> 18.30 + 18.31 +#include <linux/version.h> 18.32 + 18.33 +#include <linux/skbuff.h> 18.34 +#include <linux/netdevice.h> 18.35 +#include <linux/net.h> 18.36 +#include <linux/in.h> 18.37 +#include <linux/inet.h> 18.38 + 18.39 +#include <net/ip.h> 18.40 +#include <net/protocol.h> 18.41 +#include <net/route.h> 18.42 + 18.43 +#include <linux/if_ether.h> 18.44 +#include <linux/icmp.h> 18.45 + 18.46 +#include <asm/scatterlist.h> 18.47 +#include <linux/crypto.h> 18.48 +#include <linux/pfkeyv2.h> 18.49 +#include <linux/random.h> 18.50 + 18.51 +#include <esp.h> 18.52 +#include <sa.h> 18.53 +#include <sa_algorithm.h> 18.54 +#include <tunnel.h> 18.55 +#include <vnet.h> 18.56 +#include <skb_util.h> 18.57 + 18.58 +static const int DEBUG_ICV = 0; 18.59 + 18.60 +#define MODULE_NAME "IPSEC" 18.61 +#define DEBUG 1 18.62 +#undef DEBUG 18.63 +#include "debug.h" 18.64 + 18.65 +/* Outgoing packet: [ eth | ip | data ] 18.66 + * After etherip: [ eth2 | ip2 | ethip | eth | ip | data ] 18.67 + * After esp : [ eth2 | ip2 | esp | {ethip | eth | ip | data} | pad | icv ] 18.68 + * ^ + 18.69 + * The curly braces { ... } denote encryption. 18.70 + * The esp header includes the fixed esp headers and the iv (variable size). 18.71 + * The point marked ^ does not move. To the left is in the header, to the right 18.72 + * is in the frag. Remember that all outgoing skbs (from domains) have 1 frag. 18.73 + * Data after + is added by esp, using an extra frag. 18.74 + * 18.75 + * Incoming as above. 18.76 + * After decrypt: [ eth2 | ip2 | esp | ethip | eth | ip | data | pad | icv ] 18.77 + * Trim tail: [ eth2 | ip2 | esp | ethip | eth | ip | data ] 18.78 + * Drop hdr: [ eth2 | ip2 | ethip | eth | ip | data ] 18.79 + * ^ 18.80 + * The point marked ^ does not move. Incoming skbs are linear (no frags). 18.81 + * The tail is trimmed by adjusting skb->tail and len. 18.82 + * The esp hdr is dropped by using memmove to move the headers and 18.83 + * adjusting the skb pointers. 18.84 + * 18.85 + * todo: Now this code is in linux we can't assume 1 frag for outbound skbs, 18.86 + * or (maybe) that memmove is safe on inbound. 18.87 + */ 18.88 + 18.89 +/** Round n up to a multiple of block. 18.90 + * If block is less than 2 does nothing. 18.91 + * Otherwise assume block is a power of 2. 18.92 + * 18.93 + * @param n to round up 18.94 + * @param block size to round to a multiple of 18.95 + * @return rounded value 18.96 + */ 18.97 +static inline int roundup(int n, int block){ 18.98 + if(block <= 1) return n; 18.99 + block--; 18.100 + return (n + block) & ~block; 18.101 +} 18.102 + 18.103 +/** Check if n is a multiple of block. 18.104 + * If block is less than 2 returns 1. 18.105 + * Otherwise assumes block is a power of 2. 18.106 + * 18.107 + * @param n to check 18.108 + * @param block block size 18.109 + * @return 1 if a multiple, 0 otherwise 18.110 + */ 18.111 +static inline int multipleof(int n, int block){ 18.112 + if(block <= 1) return 1; 18.113 + block--; 18.114 + return !(n & block); 18.115 +} 18.116 + 18.117 +/** Convert from bits to bytes. 18.118 + * 18.119 + * @param n number of bits 18.120 + * @return number of bytes 18.121 + */ 18.122 +static inline int bits_to_bytes(int n){ 18.123 + return n / 8; 18.124 +} 18.125 + 18.126 + 18.127 +/** Insert esp padding at the end of an skb. 18.128 + * Inserts padding bytes, number of padding bytes, protocol number. 18.129 + * 18.130 + * @param skb skb 18.131 + * @param offset offset from skb end to where padding should end 18.132 + * @param extra_n total amount of padding 18.133 + * @param protocol protocol number (from original ip hdr) 18.134 + * @return 0 on success, error code otherwise 18.135 + */ 18.136 +static int esp_sa_pad(struct sk_buff *skb, int offset, int extra_n, 18.137 + unsigned char protocol){ 18.138 + int err; 18.139 + char *data; 18.140 + int pad_n = extra_n - ESP_PAD_N; 18.141 + int i; 18.142 + char buf[extra_n]; 18.143 + 18.144 + data = buf; 18.145 + for(i = 1; i <= pad_n; i++){ 18.146 + *data++ = i; 18.147 + } 18.148 + *data++ = pad_n; 18.149 + *data++ = protocol; 18.150 + err = skb_put_bits(skb, skb->len - offset - extra_n, buf, extra_n); 18.151 + return err; 18.152 +} 18.153 + 18.154 +/** Encrypt skb. Skips esp header and iv. 18.155 + * Assumes skb->data points at esp header. 18.156 + * 18.157 + * @param esp esp state 18.158 + * @parm esph esp header 18.159 + * @param skb packet 18.160 + * @param head_n size of esp header and iv 18.161 + * @param iv_n size of iv 18.162 + * @param text_n size of ciphertext 18.163 + * @return 0 on success, error code otherwise 18.164 + */ 18.165 +static int esp_sa_encrypt(ESPState *esp, ESPHdr *esph, struct sk_buff *skb, 18.166 + int head_n, int iv_n, int text_n){ 18.167 + int err = 0; 18.168 + int sg_n = skb_shinfo(skb)->nr_frags + 1; 18.169 + struct scatterlist sg[sg_n]; 18.170 + 18.171 + err = skb_scatterlist(skb, sg, &sg_n, head_n, text_n); 18.172 + if(err) goto exit; 18.173 + if(iv_n){ 18.174 + crypto_cipher_set_iv(esp->cipher.tfm, esp->cipher.iv, iv_n); 18.175 + } 18.176 + crypto_cipher_encrypt(esp->cipher.tfm, sg, sg, text_n); 18.177 + if(iv_n){ 18.178 + memcpy(esph->data, esp->cipher.iv, iv_n); 18.179 + crypto_cipher_get_iv(esp->cipher.tfm, esp->cipher.iv, iv_n); 18.180 + } 18.181 + exit: 18.182 + return err; 18.183 +} 18.184 + 18.185 +/** Decrypt skb. Skips esp header and iv. 18.186 + * Assumes skb->data points at esp header. 18.187 + * 18.188 + * @param esp esp state 18.189 + * @parm esph esp header 18.190 + * @param skb packet 18.191 + * @param head_n size of esp header and iv 18.192 + * @param iv_n size of iv 18.193 + * @param text_n size of ciphertext 18.194 + * @return 0 on success, error code otherwise 18.195 + */ 18.196 +static int esp_sa_decrypt(ESPState *esp, ESPHdr *esph, struct sk_buff *skb, 18.197 + int head_n, int iv_n, int text_n){ 18.198 + int err = 0; 18.199 + int sg_n = skb_shinfo(skb)->nr_frags + 1; 18.200 + struct scatterlist sg[sg_n]; 18.201 + 18.202 + err = skb_scatterlist(skb, sg, &sg_n, head_n, text_n); 18.203 + if(err) goto exit; 18.204 + if(iv_n){ 18.205 + crypto_cipher_set_iv(esp->cipher.tfm, esph->data, iv_n); 18.206 + } 18.207 + crypto_cipher_decrypt(esp->cipher.tfm, sg, sg, text_n); 18.208 + exit: 18.209 + return err; 18.210 +} 18.211 + 18.212 +/** Compute icv. Includes esp header, iv and ciphertext. 18.213 + * Assumes skb->data points at esp header. 18.214 + * 18.215 + * @param esp esp state 18.216 + * @param skb packet 18.217 + * @param digest_n number of bytes to digest 18.218 + * @param icv_n size of icv 18.219 + * @return 0 on success, error code otherwise 18.220 + */ 18.221 +static int esp_sa_digest(ESPState *esp, struct sk_buff *skb, int digest_n, int icv_n){ 18.222 + int err = 0; 18.223 + u8 icv[icv_n]; 18.224 + 18.225 + if(DEBUG_ICV){ 18.226 + dprintf("> skb digest_n=%d icv_n=%d\n", digest_n, icv_n); 18.227 + skb_print_bits(skb, 0, digest_n); 18.228 + } 18.229 + memset(icv, 0, icv_n); 18.230 + esp->digest.icv(esp, skb, 0, digest_n, icv); 18.231 + skb_put_bits(skb, digest_n, icv, icv_n); 18.232 + return err; 18.233 +} 18.234 + 18.235 +/** Check the icv and trim it from the skb tail. 18.236 + * 18.237 + * @param sa sa state 18.238 + * @param esp esp state 18.239 + * @param esph esp header 18.240 + * @param skb packet 18.241 + * @return 0 on success, error code otherwise 18.242 + */ 18.243 +static int esp_check_icv(SAState *sa, ESPState *esp, ESPHdr *esph, struct sk_buff *skb){ 18.244 + int err = 0; 18.245 + int icv_n = esp->digest.icv_n; 18.246 + int digest_n = skb->len - icv_n; 18.247 + u8 icv_skb[icv_n]; 18.248 + u8 icv_new[icv_n]; 18.249 + 18.250 + dprintf(">\n"); 18.251 + if(DEBUG_ICV){ 18.252 + dprintf("> skb len=%d digest_n=%d icv_n=%d\n", 18.253 + skb->len, digest_n, icv_n); 18.254 + skb_print_bits(skb, 0, skb->len); 18.255 + } 18.256 + if(skb_copy_bits(skb, digest_n, icv_skb, icv_n)){ 18.257 + wprintf("> Error getting icv from skb\n"); 18.258 + goto exit; 18.259 + } 18.260 + esp->digest.icv(esp, skb, 0, digest_n, icv_new); 18.261 + if(DEBUG_ICV){ 18.262 + dprintf("> len=%d icv_n=%d", digest_n, icv_n); 18.263 + printk("\nskb="); buf_print(icv_skb, icv_n); 18.264 + printk("new="); buf_print(icv_new, icv_n); 18.265 + } 18.266 + if(unlikely(memcmp(icv_new, icv_skb, icv_n))){ 18.267 + wprintf("> ICV check failed!\n"); 18.268 + err = -EINVAL; 18.269 + sa->counts.integrity_failures++; 18.270 + goto exit; 18.271 + } 18.272 + skb_trim_tail(skb, icv_n); 18.273 + exit: 18.274 + dprintf("< err=%d\n", err); 18.275 + return err; 18.276 +} 18.277 + 18.278 +/** Send a packet via an ESP SA. 18.279 + * 18.280 + * @param sa SA state 18.281 + * @param skb packet to send 18.282 + * @param tunnel underlying tunnel 18.283 + * @return 0 on success, negative error code otherwise 18.284 + */ 18.285 +static int esp_sa_send(SAState *sa, struct sk_buff *skb, Tunnel *tunnel){ 18.286 + int err = 0; 18.287 + int ip_n; // Size of ip header. 18.288 + int plaintext_n; // Size of plaintext. 18.289 + int ciphertext_n; // Size of ciphertext (including padding). 18.290 + int extra_n; // Extra bytes needed for ciphertext. 18.291 + int icv_n = 0; // Size of integrity check value (icv). 18.292 + int iv_n = 0; // Size of initialization vector (iv). 18.293 + int head_n; // Size of esp header and iv. 18.294 + int tail_n; // Size of esp trailer: padding and icv. 18.295 + ESPState *esp; 18.296 + ESPHdr *esph; 18.297 + 18.298 + dprintf(">\n"); 18.299 + esp = sa->data; 18.300 + ip_n = (skb->nh.iph->ihl << 2); 18.301 + // Assuming skb->data points at ethernet header, exclude ethernet 18.302 + // header and IP header. 18.303 + plaintext_n = skb->len - ETH_HLEN - ip_n; 18.304 + // Add size of padding fields. 18.305 + ciphertext_n = roundup(plaintext_n + ESP_PAD_N, esp->cipher.block_n); 18.306 + if(esp->cipher.pad_n > 0){ 18.307 + ciphertext_n = roundup(ciphertext_n, esp->cipher.pad_n); 18.308 + } 18.309 + extra_n = ciphertext_n - plaintext_n; 18.310 + iv_n = esp->cipher.iv_n; 18.311 + icv_n = esp->digest.icv_n; 18.312 + dprintf("> len=%d plaintext=%d ciphertext=%d extra=%d\n", 18.313 + skb->len, plaintext_n, ciphertext_n, extra_n); 18.314 + dprintf("> iv=%d icv=%d\n", iv_n, icv_n); 18.315 + skb_print_bits(skb, 0, skb->len); 18.316 + 18.317 + // Add headroom for esp header and iv, tailroom for the ciphertext 18.318 + // and icv. 18.319 + head_n = ESP_HDR_N + iv_n; 18.320 + tail_n = extra_n + icv_n; 18.321 + err = skb_make_room(&skb, skb, head_n, tail_n); 18.322 + if(err) goto exit; 18.323 + dprintf("> skb=%p\n", skb); 18.324 + // Move the headers up to make space for the esp header. We can 18.325 + // use memmove() since all this data fits in the skb head. 18.326 + // todo: Can't assume this anymore? 18.327 + dprintf("> header push...\n"); 18.328 + __skb_push(skb, head_n); 18.329 + if(0 && skb->mac.raw){ 18.330 + dprintf("> skb->mac=%p\n", skb->mac.raw); 18.331 + dprintf("> ETH header pull...\n"); 18.332 + memmove(skb->data, skb->mac.raw, ETH_HLEN); 18.333 + skb->mac.raw = skb->data; 18.334 + __skb_pull(skb, ETH_HLEN); 18.335 + } 18.336 + dprintf("> IP header pull...\n"); 18.337 + memmove(skb->data, skb->nh.raw, ip_n); 18.338 + skb->nh.raw = skb->data; 18.339 + __skb_pull(skb, ip_n); 18.340 + esph = (void*)skb->data; 18.341 + // Add spi and sequence number. 18.342 + esph->spi = sa->ident.spi; 18.343 + esph->seq = htonl(++sa->replay.send_seq); 18.344 + // Insert the padding bytes: extra bytes less the pad fields 18.345 + // themselves. 18.346 + dprintf("> esp_sa_pad ...\n"); 18.347 + esp_sa_pad(skb, icv_n, extra_n, skb->nh.iph->protocol); 18.348 + if(sa->security & SA_CONF){ 18.349 + dprintf("> esp_sa_encrypt...\n"); 18.350 + err = esp_sa_encrypt(esp, esph, skb, head_n, iv_n, ciphertext_n); 18.351 + if(err) goto exit; 18.352 + } 18.353 + if(icv_n){ 18.354 + dprintf("> esp_sa_digest...\n"); 18.355 + err = esp_sa_digest(esp, skb, head_n + ciphertext_n, icv_n); 18.356 + if(err) goto exit; 18.357 + } 18.358 + dprintf("> IP header push...\n"); 18.359 + __skb_push(skb, ip_n); 18.360 + if(0 && skb->mac.raw){ 18.361 + dprintf("> ETH header push...\n"); 18.362 + __skb_push(skb, ETH_HLEN); 18.363 + } 18.364 + // Fix ip header. Adjust length field, set protocol, zero 18.365 + // checksum. 18.366 + { 18.367 + // Total packet length (bytes). 18.368 + int tot_len = ntohs(skb->nh.iph->tot_len); 18.369 + tot_len += head_n; 18.370 + tot_len += tail_n; 18.371 + skb->nh.iph->protocol = IPPROTO_ESP; 18.372 + skb->nh.iph->tot_len = htons(tot_len); 18.373 + skb->nh.iph->check = 0; 18.374 + } 18.375 + err = Tunnel_send(tunnel, skb); 18.376 + exit: 18.377 + dprintf("< err=%d\n", err); 18.378 + return err; 18.379 +} 18.380 + 18.381 +/** Release an skb context. 18.382 + * Drops the refcount on the SA. 18.383 + * 18.384 + * @param context to free 18.385 + */ 18.386 +static void esp_context_free_fn(SkbContext *context){ 18.387 + SAState *sa; 18.388 + if(!context) return; 18.389 + sa = context->data; 18.390 + if(!sa) return; 18.391 + context->data = NULL; 18.392 + SAState_decref(sa); 18.393 +} 18.394 + 18.395 +/** Receive a packet via an ESP SA. 18.396 + * Does ESP receive processing (check icv, decrypt), strips 18.397 + * ESP header and re-receives. 18.398 + * 18.399 + * @param sa SA 18.400 + * @param skb packet 18.401 + * @return 0 on success, negative error code otherwise 18.402 + */ 18.403 +static int esp_sa_recv(SAState *sa, struct sk_buff *skb){ 18.404 + int err = -EINVAL; 18.405 + int mine = 0; 18.406 + int vnet = 0; //todo: fixme - need to record skb vnet somewhere 18.407 + ESPState *esp; 18.408 + ESPHdr *esph; 18.409 + ESPPadding *pad; 18.410 + int block_n; // Cipher blocksize. 18.411 + int icv_n; // Size of integrity check value (icv). 18.412 + int iv_n; // Size of initialization vector (iv). 18.413 + int text_n; // Size of text (ciphertext or plaintext). 18.414 + int head_n; // Size of esp header and iv. 18.415 + 18.416 + dprintf("> skb=%p\n", skb); 18.417 + // Assumes skb->data points at esp hdr. 18.418 + esph = (void*)skb->data; 18.419 + esp = sa->data; 18.420 + block_n = crypto_tfm_alg_blocksize(esp->cipher.tfm); 18.421 + icv_n = esp->digest.icv_n; 18.422 + iv_n = esp->cipher.iv_n; 18.423 + head_n = ESP_HDR_N + iv_n; 18.424 + text_n = skb->len - head_n - icv_n; 18.425 + if(text_n < ESP_PAD_N || !multipleof(text_n, block_n)){ 18.426 + wprintf("> Invalid size: text_n=%d tfm:block_n=%d esp:block_n=%d\n", 18.427 + text_n, block_n, esp->cipher.block_n); 18.428 + goto exit; 18.429 + } 18.430 + if(icv_n){ 18.431 + err = esp_check_icv(sa, esp, esph, skb); 18.432 + if(err) goto exit; 18.433 + } 18.434 + mine = 1; 18.435 + if(sa->security & SA_CONF){ 18.436 + err = esp_sa_decrypt(esp, esph, skb, head_n, iv_n, text_n); 18.437 + if(err) goto exit; 18.438 + } 18.439 + // Strip esp header by moving the other headers down. 18.440 + //todo Maybe not safe to do this anymore. 18.441 + memmove(skb->mac.raw + head_n, skb->mac.raw, (skb->data - skb->mac.raw)); 18.442 + skb->mac.raw += head_n; 18.443 + skb->nh.raw += head_n; 18.444 + // Move skb->data back to ethernet header. 18.445 + // Do in 2 moves to ensure offsets are +ve, 18.446 + // since args to skb_pull/skb_push are unsigned. 18.447 + __skb_pull(skb, head_n); 18.448 + __skb_push(skb, skb->data - skb->mac.raw); 18.449 + // After this esph is invalid. 18.450 + esph = NULL; 18.451 + // Trim padding, restore protocol in IP header. 18.452 + pad = skb_trim_tail(skb, ESP_PAD_N); 18.453 + text_n -= ESP_PAD_N; 18.454 + if((pad->pad_n > 255) | (pad->pad_n > text_n)){ 18.455 + wprintf("> Invalid padding: pad_n=%d text_n=%d\n", pad->pad_n, text_n); 18.456 + goto exit; 18.457 + } 18.458 + skb_trim_tail(skb, pad->pad_n); 18.459 + skb->nh.iph->protocol = pad->protocol; 18.460 + err = skb_push_context(skb, vnet, sa->ident.addr, IPPROTO_ESP, 18.461 + sa, esp_context_free_fn); 18.462 + if(err) goto exit; 18.463 + // Increase sa refcount now the skb context refers to it. 18.464 + SAState_incref(sa); 18.465 + err = netif_rx(skb); 18.466 + exit: 18.467 + if(mine) err = 1; 18.468 + dprintf("< skb=%p err=%d\n", skb, err); 18.469 + return err; 18.470 +} 18.471 + 18.472 +/** Estimate the packet size for some data using ESP processing. 18.473 + * 18.474 + * @param sa ESP SA 18.475 + * @param data_n data size 18.476 + * @return size after ESP processing 18.477 + */ 18.478 +static u32 esp_sa_size(SAState *sa, int data_n){ 18.479 + // Even in transport mode have to round up to blocksize. 18.480 + // Have to add some padding for alignment even if pad_n is zero. 18.481 + ESPState *esp = sa->data; 18.482 + 18.483 + data_n = roundup(data_n + ESP_PAD_N, esp->cipher.block_n); 18.484 + if(esp->cipher.pad_n > 0){ 18.485 + data_n = roundup(data_n, esp->cipher.pad_n); 18.486 + } 18.487 + data_n += esp->digest.icv_n; 18.488 + //data_n += esp->cipher.iv_n; 18.489 + data_n += ESP_HDR_N; 18.490 + return data_n; 18.491 +} 18.492 + 18.493 +/** Compute an icv using HMAC digest. 18.494 + * 18.495 + * @param esp ESP state 18.496 + * @param skb packet to digest 18.497 + * @param offset offset to start at 18.498 + * @param len number of bytes to digest 18.499 + * @param icv return parameter for ICV 18.500 + * @return 0 on success, negative error code otherwise 18.501 + */ 18.502 +static inline void esp_hmac_digest(ESPState *esp, struct sk_buff *skb, 18.503 + int offset, int len, u8 *icv){ 18.504 + int err = 0; 18.505 + struct crypto_tfm *digest = esp->digest.tfm; 18.506 + char *icv_tmp = esp->digest.icv_tmp; 18.507 + int sg_n = skb_shinfo(skb)->nr_frags + 1; 18.508 + struct scatterlist sg[sg_n]; 18.509 + 18.510 + dprintf("> offset=%d len=%d\n", offset, len); 18.511 + memset(icv, 0, esp->digest.icv_n); 18.512 + if(DEBUG_ICV){ 18.513 + dprintf("> key len=%d\n", esp->digest.key_n); 18.514 + printk("\nkey="); 18.515 + buf_print(esp->digest.key,esp->digest.key_n); 18.516 + } 18.517 + crypto_hmac_init(digest, esp->digest.key, &esp->digest.key_n); 18.518 + err = skb_scatterlist(skb, sg, &sg_n, offset, len); 18.519 + crypto_hmac_update(digest, sg, sg_n); 18.520 + crypto_hmac_final(digest, esp->digest.key, &esp->digest.key_n, icv_tmp); 18.521 + if(DEBUG_ICV){ 18.522 + dprintf("> digest len=%d ", esp->digest.icv_n); 18.523 + printk("\nval="); 18.524 + buf_print(icv_tmp, esp->digest.icv_n); 18.525 + } 18.526 + memcpy(icv, icv_tmp, esp->digest.icv_n); 18.527 + dprintf("<\n"); 18.528 +} 18.529 + 18.530 +/** Finish up an esp state. 18.531 + * Releases the digest, cipher, iv and frees the state. 18.532 + * 18.533 + * @parma esp state 18.534 + */ 18.535 +static void esp_fini(ESPState *esp){ 18.536 + if(!esp) return; 18.537 + if(esp->digest.tfm){ 18.538 + crypto_free_tfm(esp->digest.tfm); 18.539 + esp->digest.tfm = NULL; 18.540 + } 18.541 + if(esp->digest.icv_tmp){ 18.542 + kfree(esp->digest.icv_tmp); 18.543 + esp->digest.icv_tmp = NULL; 18.544 + } 18.545 + if(esp->cipher.tfm){ 18.546 + crypto_free_tfm(esp->cipher.tfm); 18.547 + esp->cipher.tfm = NULL; 18.548 + } 18.549 + if(esp->cipher.iv){ 18.550 + kfree(esp->cipher.iv); 18.551 + esp->cipher.iv = NULL; 18.552 + } 18.553 + kfree(esp); 18.554 +} 18.555 + 18.556 +/** Release an ESP SA. 18.557 + * 18.558 + * @param sa ESO SA 18.559 + */ 18.560 +static void esp_sa_fini(SAState *sa){ 18.561 + ESPState *esp; 18.562 + if(!sa) return; 18.563 + esp = sa->data; 18.564 + if(!esp) return; 18.565 + esp_fini(esp); 18.566 + sa->data = NULL; 18.567 +} 18.568 + 18.569 +/** Initialize the cipher for an ESP SA. 18.570 + * 18.571 + * @param sa ESP SA 18.572 + * @param esp ESP state 18.573 + * @return 0 on success, negative error code otherwise 18.574 + */ 18.575 +static int esp_cipher_init(SAState *sa, ESPState *esp){ 18.576 + int err = 0; 18.577 + SAAlgorithm *algo = NULL; 18.578 + int cipher_mode = CRYPTO_TFM_MODE_CBC; 18.579 + 18.580 + dprintf("> sa=%p esp=%p\n", sa, esp); 18.581 + dprintf("> cipher=%s\n", sa->cipher.name); 18.582 + algo = sa_cipher_by_name(sa->cipher.name); 18.583 + if(!algo){ 18.584 + wprintf("> Cipher unavailable: %s\n", sa->cipher.name); 18.585 + err = -EINVAL; 18.586 + goto exit; 18.587 + } 18.588 + esp->cipher.key_n = roundup(sa->cipher.bits, 8); 18.589 + // If cipher is null must use ECB because CBC algo does not support blocksize 1. 18.590 + if(strcmp(sa->cipher.name, "cipher_null")){ 18.591 + cipher_mode = CRYPTO_TFM_MODE_ECB; 18.592 + } 18.593 + esp->cipher.tfm = crypto_alloc_tfm(sa->cipher.name, cipher_mode); 18.594 + if(!esp->cipher.tfm){ 18.595 + err = -ENOMEM; 18.596 + goto exit; 18.597 + } 18.598 + esp->cipher.block_n = roundup(crypto_tfm_alg_blocksize(esp->cipher.tfm), 4); 18.599 + esp->cipher.iv_n = crypto_tfm_alg_ivsize(esp->cipher.tfm); 18.600 + esp->cipher.pad_n = 0; 18.601 + if(esp->cipher.iv_n){ 18.602 + esp->cipher.iv = kmalloc(esp->cipher.iv_n, GFP_KERNEL); 18.603 + get_random_bytes(esp->cipher.iv, esp->cipher.iv_n); 18.604 + } 18.605 + crypto_cipher_setkey(esp->cipher.tfm, esp->cipher.key, esp->cipher.key_n); 18.606 + err = 0; 18.607 + exit: 18.608 + dprintf("< err=%d\n", err); 18.609 + return err; 18.610 +} 18.611 + 18.612 +/** Initialize the digest for an ESP SA. 18.613 + * 18.614 + * @param sa ESP SA 18.615 + * @param esp ESP state 18.616 + * @return 0 on success, negative error code otherwise 18.617 + */ 18.618 +static int esp_digest_init(SAState *sa, ESPState *esp){ 18.619 + int err = 0; 18.620 + SAAlgorithm *algo = NULL; 18.621 + 18.622 + dprintf(">\n"); 18.623 + esp->digest.key = sa->digest.key; 18.624 + esp->digest.key_n = bits_to_bytes(roundup(sa->digest.bits, 8)); 18.625 + esp->digest.tfm = crypto_alloc_tfm(sa->digest.name, 0); 18.626 + if(!esp->digest.tfm){ 18.627 + err = -ENOMEM; 18.628 + goto exit; 18.629 + } 18.630 + algo = sa_digest_by_name(sa->digest.name); 18.631 + if(!algo){ 18.632 + wprintf("> Digest unavailable: %s\n", sa->digest.name); 18.633 + err = -EINVAL; 18.634 + goto exit; 18.635 + } 18.636 + esp->digest.icv = esp_hmac_digest; 18.637 + esp->digest.icv_full_n = bits_to_bytes(algo->info.digest.icv_fullbits); 18.638 + esp->digest.icv_n = bits_to_bytes(algo->info.digest.icv_truncbits); 18.639 + 18.640 + if(esp->digest.icv_full_n != crypto_tfm_alg_digestsize(esp->digest.tfm)){ 18.641 + err = -EINVAL; 18.642 + wprintf("> digest %s, size %u != %hu\n", 18.643 + sa->digest.name, 18.644 + crypto_tfm_alg_digestsize(esp->digest.tfm), 18.645 + esp->digest.icv_full_n); 18.646 + goto exit; 18.647 + } 18.648 + 18.649 + esp->digest.icv_tmp = kmalloc(esp->digest.icv_full_n, GFP_KERNEL); 18.650 + if(!esp->digest.icv_tmp){ 18.651 + err = -ENOMEM; 18.652 + goto exit; 18.653 + } 18.654 + exit: 18.655 + dprintf("< err=%d\n", err); 18.656 + return err; 18.657 +} 18.658 + 18.659 +/** Initialize an ESP SA. 18.660 + * 18.661 + * @param sa ESP SA 18.662 + * @param args arguments 18.663 + * @return 0 on success, negative error code otherwise 18.664 + */ 18.665 +static int esp_sa_init(SAState *sa, void *args){ 18.666 + int err = 0; 18.667 + ESPState *esp = NULL; 18.668 + 18.669 + dprintf("> sa=%p\n", sa); 18.670 + esp = kmalloc(sizeof(*esp), GFP_KERNEL); 18.671 + if(!esp){ 18.672 + err = -ENOMEM; 18.673 + goto exit; 18.674 + } 18.675 + *esp = (ESPState){}; 18.676 + err = esp_cipher_init(sa, esp); 18.677 + if(err) goto exit; 18.678 + err = esp_digest_init(sa, esp); 18.679 + if(err) goto exit; 18.680 + sa->data = esp; 18.681 + exit: 18.682 + if(err){ 18.683 + if(esp) esp_fini(esp); 18.684 + } 18.685 + dprintf("< err=%d\n", err); 18.686 + return err; 18.687 +} 18.688 + 18.689 +/** SA type for ESP. 18.690 + */ 18.691 +static SAType esp_sa_type = { 18.692 + .name = "ESP", 18.693 + .protocol = IPPROTO_ESP, 18.694 + .init = esp_sa_init, 18.695 + .fini = esp_sa_fini, 18.696 + .size = esp_sa_size, 18.697 + .recv = esp_sa_recv, 18.698 + .send = esp_sa_send 18.699 +}; 18.700 + 18.701 +/** Get the ESP header from a packet. 18.702 + * 18.703 + * @param skb packet 18.704 + * @param esph return parameter for header 18.705 + * @return 0 on success, negative error code otherwise 18.706 + */ 18.707 +static int esp_skb_header(struct sk_buff *skb, ESPHdr **esph){ 18.708 + int err = 0; 18.709 + if(skb->len < ESP_HDR_N){ 18.710 + err = -EINVAL; 18.711 + goto exit; 18.712 + } 18.713 + *esph = (ESPHdr*)skb->data; 18.714 + exit: 18.715 + return err; 18.716 +} 18.717 + 18.718 +/** Handle an incoming skb with ESP protocol. 18.719 + * 18.720 + * Lookup spi, if state found hand to the state. 18.721 + * If no state, check spi, if ok, create state and pass to it. 18.722 + * If spi not ok, drop. 18.723 + * 18.724 + * @param skb packet 18.725 + * @return 0 on sucess, negative error code otherwise 18.726 + */ 18.727 +static int esp_protocol_recv(struct sk_buff *skb){ 18.728 + int err = 0; 18.729 + const int eth_n = ETH_HLEN; 18.730 + int ip_n; 18.731 + ESPHdr *esph = NULL; 18.732 + SAState *sa = NULL; 18.733 + u32 addr; 18.734 + 18.735 + dprintf(">\n"); 18.736 + dprintf("> recv skb=\n"); skb_print_bits(skb, 0, skb->len); 18.737 + ip_n = (skb->nh.iph->ihl << 2); 18.738 + if(skb->data == skb->mac.raw){ 18.739 + // skb->data points at ethernet header. 18.740 + if (!pskb_may_pull(skb, eth_n + ip_n)){ 18.741 + wprintf("> Malformed skb\n"); 18.742 + err = -EINVAL; 18.743 + goto exit; 18.744 + } 18.745 + skb_pull(skb, eth_n + ip_n); 18.746 + } 18.747 + addr = skb->nh.iph->daddr; 18.748 + err = esp_skb_header(skb, &esph); 18.749 + if(err) goto exit; 18.750 + dprintf("> spi=%08x protocol=%d addr=" IPFMT "\n", 18.751 + esph->spi, IPPROTO_ESP, NIPQUAD(addr)); 18.752 + sa = sa_table_lookup_spi(esph->spi, IPPROTO_ESP, addr); 18.753 + if(!sa){ 18.754 + err = vnet_sa_create(esph->spi, IPPROTO_ESP, addr, &sa); 18.755 + if(err) goto exit; 18.756 + } 18.757 + err = SAState_recv(sa, skb); 18.758 + exit: 18.759 + if(sa) SAState_decref(sa); 18.760 + dprintf("< err=%d\n", err); 18.761 + return err; 18.762 +} 18.763 + 18.764 +/** Handle an ICMP error related to ESP. 18.765 + * 18.766 + * @param skb ICMP error packet 18.767 + * @param info 18.768 + */ 18.769 +static void esp_protocol_icmp_err(struct sk_buff *skb, u32 info){ 18.770 + struct iphdr *iph = (struct iphdr*)skb->data; 18.771 + ESPHdr *esph; 18.772 + SAState *sa; 18.773 + 18.774 + dprintf("> ICMP error type=%d code=%d\n", 18.775 + skb->h.icmph->type, skb->h.icmph->code); 18.776 + if(skb->h.icmph->type != ICMP_DEST_UNREACH || 18.777 + skb->h.icmph->code != ICMP_FRAG_NEEDED){ 18.778 + return; 18.779 + } 18.780 + 18.781 + //todo: need to check skb has enough len to do this. 18.782 + esph = (ESPHdr*)(skb->data + (iph->ihl << 2)); 18.783 + sa = sa_table_lookup_spi(esph->spi, IPPROTO_ESP, iph->daddr); 18.784 + if(!sa) return; 18.785 + wprintf("> ICMP unreachable on SA ESP spi=%08x addr=" IPFMT "\n", 18.786 + ntohl(esph->spi), NIPQUAD(iph->daddr)); 18.787 + SAState_decref(sa); 18.788 +} 18.789 + 18.790 +//============================================================================ 18.791 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) 18.792 +// Code for 2.6 kernel. 18.793 + 18.794 +/** Protocol handler for ESP. 18.795 + */ 18.796 +static struct net_protocol esp_protocol = { 18.797 + .handler = esp_protocol_recv, 18.798 + .err_handler = esp_protocol_icmp_err 18.799 +}; 18.800 + 18.801 +static int esp_protocol_add(void){ 18.802 + return inet_add_protocol(&esp_protocol, IPPROTO_ESP); 18.803 +} 18.804 + 18.805 +static int esp_protocol_del(void){ 18.806 + return inet_del_protocol(&esp_protocol, IPPROTO_ESP); 18.807 +} 18.808 + 18.809 +//============================================================================ 18.810 +#else 18.811 +//============================================================================ 18.812 +// Code for 2.4 kernel. 18.813 + 18.814 +/** Protocol handler for ESP. 18.815 + */ 18.816 +static struct inet_protocol esp_protocol = { 18.817 + .name = "ESP", 18.818 + .protocol = IPPROTO_ESP, 18.819 + .handler = esp_protocol_recv, 18.820 + .err_handler = esp_protocol_icmp_err 18.821 +}; 18.822 + 18.823 +static int esp_protocol_add(void){ 18.824 + inet_add_protocol(&esp_protocol); 18.825 + return 0; 18.826 +} 18.827 + 18.828 +static int esp_protocol_del(void){ 18.829 + return inet_del_protocol(&esp_protocol); 18.830 +} 18.831 + 18.832 +#endif 18.833 +//============================================================================ 18.834 + 18.835 + 18.836 +/** Initialize the ESP module. 18.837 + * Registers the ESP protocol and SA type. 18.838 + * 18.839 + * @return 0 on success, negative error code otherwise 18.840 + */ 18.841 +int __init esp_module_init(void){ 18.842 + int err = 0; 18.843 + dprintf(">\n"); 18.844 + err = SAType_add(&esp_sa_type); 18.845 + if(err < 0){ 18.846 + eprintf("> Error adding esp sa type\n"); 18.847 + goto exit; 18.848 + } 18.849 + esp_protocol_add(); 18.850 + exit: 18.851 + dprintf("< err=%d\n", err); 18.852 + return err; 18.853 +} 18.854 + 18.855 +/** Finalize the ESP module. 18.856 + * Deregisters the ESP protocol and SA type. 18.857 + */ 18.858 +void __exit esp_module_exit(void){ 18.859 + if(esp_protocol_del() < 0){ 18.860 + eprintf("> Error removing esp protocol\n"); 18.861 + } 18.862 + if(SAType_del(&esp_sa_type) < 0){ 18.863 + eprintf("> Error removing esp sa type\n"); 18.864 + } 18.865 +} 18.866 +
19.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 19.2 +++ b/tools/vnet/vnet-module/esp.h Mon Nov 22 16:49:15 2004 +0000 19.3 @@ -0,0 +1,111 @@ 19.4 +/* 19.5 + * Copyright (C) 2004 Mike Wray <mike.wray@hp.com> 19.6 + * 19.7 + * This program is free software; you can redistribute it and/or modify 19.8 + * it under the terms of the GNU General Public License as published by the 19.9 + * Free Software Foundation; either version 2 of the License, or (at your 19.10 + * option) any later version. 19.11 + * 19.12 + * This program is distributed in the hope that it will be useful, but 19.13 + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 19.14 + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 19.15 + * for more details. 19.16 + * 19.17 + * You should have received a copy of the GNU General Public License along 19.18 + * with this program; if not, write to the Free software Foundation, Inc., 19.19 + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA 19.20 + * 19.21 + */ 19.22 +#ifndef __VNET_ESP_H__ 19.23 +#define __VNET_ESP_H__ 19.24 + 19.25 +#include <linux/config.h> 19.26 +#include <linux/types.h> 19.27 +#include <linux/crypto.h> 19.28 + 19.29 +/** Header used by IPSEC ESP (Encapsulated Security Payload). */ 19.30 +typedef struct ESPHdr { 19.31 + /** The spi (security parameters index). */ 19.32 + u32 spi; 19.33 + /** Sequence number. */ 19.34 + u32 seq; 19.35 + /* Variable length data (depends on crypto suite). 19.36 + Mind the 64 bit alignment! */ 19.37 + u8 data[0]; 19.38 +} ESPHdr; 19.39 + 19.40 +/** Padding trailer used by IPSEC ESP. 19.41 + * Follows the padding itself with the padding length and the 19.42 + * protocol being encapsulated. 19.43 + */ 19.44 +typedef struct ESPPadding { 19.45 + u8 pad_n; 19.46 + u8 protocol; 19.47 +} ESPPadding; 19.48 + 19.49 +/** Size of the esp header (spi and seq). */ 19.50 +static const int ESP_HDR_N = sizeof(ESPHdr); 19.51 + 19.52 +/** Size of the esp pad and next protocol field. */ 19.53 +static const int ESP_PAD_N = sizeof(ESPPadding); 19.54 + 19.55 +enum { 19.56 + SASTATE_VOID, 19.57 + SASTATE_ACQUIRE, 19.58 + SASTATE_VALID, 19.59 + SASTATE_ERROR, 19.60 + SASTATE_EXPIRED, 19.61 + SASTATE_DEAD, 19.62 +}; 19.63 + 19.64 +struct ESPState; 19.65 + 19.66 +/** A cipher instance. */ 19.67 +typedef struct ESPCipher { 19.68 + /** Cipher key. */ 19.69 + u8 *key; 19.70 + /** Key size (bytes). */ 19.71 + int key_n; 19.72 + /** Initialization vector (IV). */ 19.73 + u8 *iv; 19.74 + /** IV size (bytes). */ 19.75 + int iv_n; 19.76 + /** Block size for padding (bytes). */ 19.77 + int pad_n; 19.78 + /** Cipher block size (bytes). */ 19.79 + int block_n; 19.80 + /** Cipher crypto transform. */ 19.81 + struct crypto_tfm *tfm; 19.82 +} ESPCipher; 19.83 + 19.84 +/** A digest instance. */ 19.85 +typedef struct ESPDigest { 19.86 + /** Digest key. */ 19.87 + u8 *key; 19.88 + /** Key size (bytes) */ 19.89 + int key_n; 19.90 + /** ICV size used (bytes). */ 19.91 + u8 icv_n; 19.92 + /** Full ICV size when computed (bytes). */ 19.93 + u8 icv_full_n; 19.94 + /** Working storage for computing ICV. */ 19.95 + u8 *icv_tmp; 19.96 + /** Function used to compute ICV (e.g. HMAC). */ 19.97 + void (*icv)(struct ESPState *esp, 19.98 + struct sk_buff *skb, 19.99 + int offset, 19.100 + int len, 19.101 + u8 *icv); 19.102 + /** Digest crypto transform (e.g. SHA). */ 19.103 + struct crypto_tfm *tfm; 19.104 +} ESPDigest; 19.105 + 19.106 +typedef struct ESPState { 19.107 + struct ESPCipher cipher; 19.108 + struct ESPDigest digest; 19.109 +} ESPState; 19.110 + 19.111 +extern int esp_module_init(void); 19.112 +extern void esp_module_exit(void); 19.113 + 19.114 +#endif /* !__VNET_ESP_H__ */
20.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 20.2 +++ b/tools/vnet/vnet-module/etherip.c Mon Nov 22 16:49:15 2004 +0000 20.3 @@ -0,0 +1,411 @@ 20.4 +/* 20.5 + * Copyright (C) 2004 Mike Wray <mike.wray@hp.com> 20.6 + * 20.7 + * This program is free software; you can redistribute it and/or modify 20.8 + * it under the terms of the GNU General Public License as published by the 20.9 + * Free Software Foundation; either version 2 of the License, or (at your 20.10 + * option) any later version. 20.11 + * 20.12 + * This program is distributed in the hope that it will be useful, but 20.13 + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 20.14 + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 20.15 + * for more details. 20.16 + * 20.17 + * You should have received a copy of the GNU General Public License along 20.18 + * with this program; if not, write to the Free software Foundation, Inc., 20.19 + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA 20.20 + * 20.21 + */ 20.22 +#include <linux/config.h> 20.23 +#include <linux/module.h> 20.24 +#include <linux/types.h> 20.25 +#include <linux/kernel.h> 20.26 +#include <linux/init.h> 20.27 + 20.28 +#include <linux/version.h> 20.29 + 20.30 +#include <linux/skbuff.h> 20.31 +#include <linux/net.h> 20.32 +#include <linux/netdevice.h> 20.33 +#include <linux/in.h> 20.34 +#include <linux/inet.h> 20.35 +#include <linux/netfilter_ipv4.h> 20.36 +#include <linux/icmp.h> 20.37 + 20.38 +#include <net/ip.h> 20.39 +#include <net/protocol.h> 20.40 +#include <net/route.h> 20.41 +#include <net/checksum.h> 20.42 + 20.43 +#include <etherip.h> 20.44 +#include <tunnel.h> 20.45 +#include <vnet.h> 20.46 +#include <varp.h> 20.47 +#include <if_varp.h> 20.48 +#include <skb_util.h> 20.49 + 20.50 +#define MODULE_NAME "VNET" 20.51 +//#define DEBUG 1 20.52 +#undef DEBUG 20.53 +#include "debug.h" 20.54 + 20.55 +/** @file Etherip implementation. 20.56 + * The etherip protocol is used to transport Ethernet frames in IP packets. 20.57 + */ 20.58 + 20.59 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) 20.60 +#define MAC_ETH(_skb) ((struct ethhdr *)(_skb)->mac.raw) 20.61 +#else 20.62 +#define MAC_ETH(_skb) ((_skb)->mac.ethernet) 20.63 +#endif 20.64 + 20.65 +/** Get the vnet label from an etherip header. 20.66 + * 20.67 + * @param hdr header 20.68 + * @return vnet (in host order) 20.69 + */ 20.70 +int etheriphdr_get_vnet(struct etheriphdr *hdr){ 20.71 +#ifdef CONFIG_ETHERIP_EXT 20.72 + return ntohl(hdr->vnet); 20.73 +#else 20.74 + return hdr->reserved; 20.75 +#endif 20.76 +} 20.77 + 20.78 +/** Set the vnet label in an etherip header. 20.79 + * Also sets the etherip version. 20.80 + * 20.81 + * @param hdr header 20.82 + * @param vnet vnet label (in host order) 20.83 + */ 20.84 +void etheriphdr_set_vnet(struct etheriphdr *hdr, int vnet){ 20.85 +#ifdef CONFIG_ETHERIP_EXT 20.86 + hdr->version = 4; 20.87 + hdr->vnet = htonl(vnet); 20.88 +#else 20.89 + hdr->version = 3; 20.90 + hdr->reserved = vnet & 0x0fff; 20.91 +#endif 20.92 +} 20.93 + 20.94 +/** Open an etherip tunnel. 20.95 + * 20.96 + * @param tunnel to open 20.97 + * @return 0 on success, error code otherwise 20.98 + */ 20.99 +static int etherip_tunnel_open(Tunnel *tunnel){ 20.100 + return 0; 20.101 +} 20.102 + 20.103 +/** Close an etherip tunnel. 20.104 + * 20.105 + * @param tunnel to close 20.106 + */ 20.107 +static void etherip_tunnel_close(Tunnel *tunnel){ 20.108 +} 20.109 + 20.110 + 20.111 +/** Send a packet via an etherip tunnel. 20.112 + * Adds etherip header, new ip header, new ethernet header around 20.113 + * ethernet frame. 20.114 + * 20.115 + * @param tunnel tunnel 20.116 + * @param skb packet 20.117 + * @return 0 on success, error code otherwise 20.118 + */ 20.119 +static int etherip_tunnel_send(Tunnel *tunnel, struct sk_buff *skb){ 20.120 + int err = 0; 20.121 + const int etherip_n = sizeof(struct etheriphdr); 20.122 + const int ip_n = sizeof(struct iphdr); 20.123 + const int eth_n = ETH_HLEN; 20.124 + int head_n = 0; 20.125 + int vnet = tunnel->key.vnet; 20.126 + struct etheriphdr *etheriph; 20.127 + struct ethhdr *ethh; 20.128 + u32 saddr = 0; 20.129 + 20.130 + dprintf("> skb=%p vnet=%d\n", skb, vnet); 20.131 + head_n = etherip_n + ip_n + eth_n; 20.132 + err = skb_make_room(&skb, skb, head_n, 0); 20.133 + if(err) goto exit; 20.134 + 20.135 + //err = vnet_get_device_address(skb->dev, &saddr); 20.136 + //if(err) goto exit; 20.137 + 20.138 + // The original ethernet header. 20.139 + ethh = MAC_ETH(skb); 20.140 + //print_skb_data(__FUNCTION__, 0, skb, skb->mac.raw, skb->len); 20.141 + // Null the pointer as we are pushing a new IP header. 20.142 + skb->mac.raw = NULL; 20.143 + 20.144 + // Setup the etherip header. 20.145 + //dprintf("> push etherip header...\n"); 20.146 + etheriph = (struct etheriphdr *)skb_push(skb, etherip_n); 20.147 + etheriphdr_set_vnet(etheriph, vnet); 20.148 + 20.149 + // Setup the IP header. 20.150 + //dprintf("> push IP header...\n"); 20.151 + skb->nh.raw = skb_push(skb, ip_n); 20.152 + skb->nh.iph->version = 4; // Standard version. 20.153 + skb->nh.iph->ihl = ip_n / 4; // IP header length (32-bit words). 20.154 + skb->nh.iph->tos = 0; // No special type-of-service. 20.155 + skb->nh.iph->tot_len = htons(skb->len); // Total packet length (bytes). 20.156 + skb->nh.iph->id = 0; // No flow id (since no frags). 20.157 + skb->nh.iph->frag_off = htons(IP_DF); // Don't fragment - can't handle frags. 20.158 + skb->nh.iph->ttl = 64; // Linux default time-to-live. 20.159 + skb->nh.iph->protocol = IPPROTO_ETHERIP; // IP protocol number. 20.160 + skb->nh.iph->saddr = saddr; // Source address. 20.161 + skb->nh.iph->daddr = tunnel->key.addr; // Destination address. 20.162 + skb->nh.iph->check = 0; 20.163 + 20.164 + // Ethernet header will be filled-in by device. 20.165 + err = Tunnel_send(tunnel->base, skb); 20.166 + skb = NULL; 20.167 + exit: 20.168 + if(err && skb) dev_kfree_skb(skb); 20.169 + //dprintf("< err=%d\n", err); 20.170 + return err; 20.171 +} 20.172 + 20.173 +/** Tunnel type for etherip. 20.174 + */ 20.175 +static TunnelType _etherip_tunnel_type = { 20.176 + .name = "ETHERIP", 20.177 + .open = etherip_tunnel_open, 20.178 + .close = etherip_tunnel_close, 20.179 + .send = etherip_tunnel_send 20.180 +}; 20.181 + 20.182 +TunnelType *etherip_tunnel_type = &_etherip_tunnel_type; 20.183 + 20.184 +/* Defeat compiler warnings about unused functions. */ 20.185 +static void print_str(char *s, int n) __attribute__((unused)); 20.186 + 20.187 +static void print_str(char *s, int n) { 20.188 + int i; 20.189 + 20.190 + for(i=0; i<n; s++, i++){ 20.191 + if(i && i % 40 == 0) printk("\n"); 20.192 + if(('a'<= *s && *s <= 'z') || 20.193 + ('A'<= *s && *s <= 'Z') || 20.194 + ('0'<= *s && *s <= '9')){ 20.195 + printk("%c", *s); 20.196 + } else { 20.197 + printk("<%x>", (unsigned)(0xff & *s)); 20.198 + } 20.199 + } 20.200 + printk("\n"); 20.201 +} 20.202 + 20.203 +/** Do etherip receive processing. 20.204 + * Strips etherip header to extract the ethernet frame, sets 20.205 + * the vnet from the header and re-receives the frame. 20.206 + * 20.207 + * @param skb packet 20.208 + * @return 0 on success, error code otherwise 20.209 + */ 20.210 +static int etherip_protocol_recv(struct sk_buff *skb){ 20.211 + int err = 0; 20.212 + int mine = 0; 20.213 + const int eth_n = ETH_HLEN; 20.214 + int ip_n; 20.215 + const int etherip_n = sizeof(struct etheriphdr); 20.216 + struct etheriphdr *etheriph; 20.217 + struct ethhdr *ethhdr; 20.218 + Vnet *vinfo = NULL; 20.219 + u32 vnet; 20.220 + 20.221 + ethhdr = MAC_ETH(skb); 20.222 + if(MULTICAST(skb->nh.iph->daddr) && 20.223 + (skb->nh.iph->daddr != varp_mcast_addr)){ 20.224 + // Ignore multicast packets not addressed to us. 20.225 + dprintf("> dst=%u.%u.%u.%u varp_mcast_addr=%u.%u.%u.%u\n", 20.226 + NIPQUAD(skb->nh.iph->daddr), 20.227 + NIPQUAD(varp_mcast_addr)); 20.228 + goto exit; 20.229 + } 20.230 + ip_n = (skb->nh.iph->ihl << 2); 20.231 + if(skb->data == skb->mac.raw){ 20.232 + // skb->data points at ethernet header. 20.233 + //dprintf("> len=%d\n", skb->len); 20.234 + if (!pskb_may_pull(skb, eth_n + ip_n)){ 20.235 + wprintf("> Malformed skb\n"); 20.236 + err = -EINVAL; 20.237 + goto exit; 20.238 + } 20.239 + skb_pull(skb, eth_n + ip_n); 20.240 + } 20.241 + // Assume skb->data points at etherip header. 20.242 + etheriph = (void*)skb->data; 20.243 + if(!pskb_may_pull(skb, etherip_n)){ 20.244 + wprintf("> Malformed skb\n"); 20.245 + err = -EINVAL; 20.246 + goto exit; 20.247 + } 20.248 + vnet = etheriphdr_get_vnet(etheriph); 20.249 + dprintf("> Rcvd skb=%p vnet=%d\n", skb, vnet); 20.250 + // If vnet is secure, context must include IPSEC ESP. 20.251 + err = vnet_check_context(vnet, SKB_CONTEXT(skb), &vinfo); 20.252 + Vnet_decref(vinfo); 20.253 + if(err){ 20.254 + wprintf("> Failed security check\n"); 20.255 + goto exit; 20.256 + } 20.257 + mine = 1; 20.258 + // Point at the headers in the contained ethernet frame. 20.259 + skb->mac.raw = skb_pull(skb, etherip_n); 20.260 + 20.261 + // Know source ip, vnet, vmac, so could update varp cache. 20.262 + // But if traffic comes to us over a vnetd tunnel this points the coa 20.263 + // at the vnetd rather than the endpoint. So don't do it. 20.264 + //varp_update(htonl(vnet), MAC_ETH(skb)->h_source, skb->nh.iph->saddr); 20.265 + 20.266 + // Assuming a standard Ethernet frame. 20.267 + skb->nh.raw = skb_pull(skb, ETH_HLEN); 20.268 + 20.269 +#ifdef CONFIG_NETFILTER 20.270 +#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) 20.271 + // This stops our new pkt header being clobbered by a subsequent 20.272 + // call to nf_bridge_maybe_copy_header. Just replicate the 20.273 + // corresponding nf_bridge_save_header. 20.274 + if(skb->nf_bridge){ 20.275 + int header_size = 16; 20.276 + if(MAC_ETH(skb)->h_proto == __constant_htons(ETH_P_8021Q)) { 20.277 + header_size = 18; 20.278 + } 20.279 + memcpy(skb->nf_bridge->data, skb->data - header_size, header_size); 20.280 + } 20.281 +#endif 20.282 +#endif 20.283 + 20.284 + if(1){ 20.285 + struct ethhdr *eth = MAC_ETH(skb); 20.286 + // Devices use eth_type_trans() to set skb->pkt_type and skb->protocol. 20.287 + // Set them from contained ethhdr, or leave as received? 20.288 + // 'Ware use of hard_header_len in eth_type_trans(). 20.289 + 20.290 + //skb->protocol = htons(ETH_P_IP); 20.291 + 20.292 + if(ntohs(eth->h_proto) >= 1536){ 20.293 + skb->protocol = eth->h_proto; 20.294 + } else { 20.295 + skb->protocol = htons(ETH_P_802_2); 20.296 + } 20.297 + 20.298 + if(mac_is_multicast(eth->h_dest)){ 20.299 + if(mac_is_broadcast(eth->h_dest)){ 20.300 + skb->pkt_type = PACKET_BROADCAST; 20.301 + } else { 20.302 + skb->pkt_type = PACKET_MULTICAST; 20.303 + } 20.304 + } else { 20.305 + skb->pkt_type = PACKET_HOST; 20.306 + } 20.307 + 20.308 + memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options)); 20.309 + if (skb->ip_summed == CHECKSUM_HW){ 20.310 + skb->ip_summed = CHECKSUM_NONE; 20.311 + //skb->csum = csum_sub(skb->csum, 20.312 + // csum_partial(skb->mac.raw, skb->nh.raw - skb->mac.raw, 0)); 20.313 + } 20.314 + dst_release(skb->dst); 20.315 + skb->dst = NULL; 20.316 +#ifdef CONFIG_NETFILTER 20.317 + nf_conntrack_put(skb->nfct); 20.318 + skb->nfct = NULL; 20.319 +#ifdef CONFIG_NETFILTER_DEBUG 20.320 + skb->nf_debug = 0; 20.321 +#endif 20.322 +#endif 20.323 + } 20.324 + 20.325 + //print_skb_data(__FUNCTION__, 0, skb, skb->mac.raw, skb->len + ETH_HLEN); 20.326 + 20.327 + err = vnet_skb_recv(skb, vnet, (Vmac*)MAC_ETH(skb)->h_dest); 20.328 + exit: 20.329 + if(mine) err = 1; 20.330 + dprintf("< skb=%p err=%d\n", skb, err); 20.331 + return err; 20.332 +} 20.333 + 20.334 +/** Handle an ICMP error related to etherip. 20.335 + * 20.336 + * @param skb ICMP error packet 20.337 + * @param info 20.338 + */ 20.339 +static void etherip_protocol_icmp_err(struct sk_buff *skb, u32 info){ 20.340 + struct iphdr *iph = (struct iphdr*)skb->data; 20.341 + 20.342 + wprintf("> ICMP error type=%d code=%d addr=" IPFMT "\n", 20.343 + skb->h.icmph->type, skb->h.icmph->code, NIPQUAD(iph->daddr)); 20.344 + 20.345 + if (skb->h.icmph->type != ICMP_DEST_UNREACH || 20.346 + skb->h.icmph->code != ICMP_FRAG_NEEDED){ 20.347 + return; 20.348 + } 20.349 + wprintf("> MTU too big addr= " IPFMT "\n", NIPQUAD(iph->daddr)); 20.350 +} 20.351 + 20.352 +//============================================================================ 20.353 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) 20.354 +// Code for 2.6 kernel. 20.355 + 20.356 +/** Etherip protocol. */ 20.357 +static struct net_protocol etherip_protocol = { 20.358 + .handler = etherip_protocol_recv, 20.359 + .err_handler = etherip_protocol_icmp_err, 20.360 +}; 20.361 + 20.362 +static int etherip_protocol_add(void){ 20.363 + return inet_add_protocol(ðerip_protocol, IPPROTO_ETHERIP); 20.364 +} 20.365 + 20.366 +static int etherip_protocol_del(void){ 20.367 + return inet_del_protocol(ðerip_protocol, IPPROTO_ETHERIP); 20.368 +} 20.369 + 20.370 +//============================================================================ 20.371 +#else 20.372 +//============================================================================ 20.373 +// Code for 2.4 kernel. 20.374 + 20.375 +/** Etherip protocol. */ 20.376 +static struct inet_protocol etherip_protocol = { 20.377 + .name = "ETHERIP", 20.378 + .protocol = IPPROTO_ETHERIP, 20.379 + .handler = etherip_protocol_recv, 20.380 + .err_handler = etherip_protocol_icmp_err, 20.381 +}; 20.382 + 20.383 +static int etherip_protocol_add(void){ 20.384 + inet_add_protocol(ðerip_protocol); 20.385 + return 0; 20.386 +} 20.387 + 20.388 +static int etherip_protocol_del(void){ 20.389 + return inet_del_protocol(ðerip_protocol); 20.390 +} 20.391 + 20.392 +#endif 20.393 +//============================================================================ 20.394 + 20.395 + 20.396 +/** Initialize the etherip module. 20.397 + * Registers the etherip protocol. 20.398 + * 20.399 + * @return 0 on success, error code otherwise 20.400 + */ 20.401 +int __init etherip_module_init(void) { 20.402 + int err = 0; 20.403 + etherip_protocol_add(); 20.404 + return err; 20.405 +} 20.406 + 20.407 +/** Finalize the etherip module. 20.408 + * Deregisters the etherip protocol. 20.409 + */ 20.410 +void __exit etherip_module_exit(void) { 20.411 + if(etherip_protocol_del() < 0){ 20.412 + printk(KERN_INFO "%s: can't remove etherip protocol\n", __FUNCTION__); 20.413 + } 20.414 +}
21.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 21.2 +++ b/tools/vnet/vnet-module/etherip.h Mon Nov 22 16:49:15 2004 +0000 21.3 @@ -0,0 +1,27 @@ 21.4 +/* 21.5 + * Copyright (C) 2004 Mike Wray <mike.wray@hp.com> 21.6 + * 21.7 + * This program is free software; you can redistribute it and/or modify 21.8 + * it under the terms of the GNU General Public License as published by the 21.9 + * Free Software Foundation; either version 2 of the License, or (at your 21.10 + * option) any later version. 21.11 + * 21.12 + * This program is distributed in the hope that it will be useful, but 21.13 + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 21.14 + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 21.15 + * for more details. 21.16 + * 21.17 + * You should have received a copy of the GNU General Public License along 21.18 + * with this program; if not, write to the Free software Foundation, Inc., 21.19 + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA 21.20 + * 21.21 + */ 21.22 +#ifndef _VNET_ETHERIP_H_ 21.23 +#define _VNET_ETHERIP_H_ 21.24 + 21.25 +#include "if_etherip.h" 21.26 + 21.27 +extern int etherip_module_init(void); 21.28 +extern void etherip_module_exit(void); 21.29 + 21.30 +#endif
22.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 22.2 +++ b/tools/vnet/vnet-module/if_etherip.h Mon Nov 22 16:49:15 2004 +0000 22.3 @@ -0,0 +1,51 @@ 22.4 +/* 22.5 + * Copyright (C) 2004 Mike Wray <mike.wray@hp.com> 22.6 + * 22.7 + * This program is free software; you can redistribute it and/or modify 22.8 + * it under the terms of the GNU General Public License as published by the 22.9 + * Free Software Foundation; either version 2 of the License, or (at your 22.10 + * option) any later version. 22.11 + * 22.12 + * This program is distributed in the hope that it will be useful, but 22.13 + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 22.14 + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 22.15 + * for more details. 22.16 + * 22.17 + * You should have received a copy of the GNU General Public License along 22.18 + * with this program; if not, write to the Free software Foundation, Inc., 22.19 + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA 22.20 + * 22.21 + */ 22.22 +#ifndef _VNET_IF_ETHERIP_H_ 22.23 +#define _VNET_IF_ETHERIP_H_ 22.24 +/*----------------------------------------------------------------------------*/ 22.25 +#ifdef CONFIG_ETHERIP_EXT 22.26 +struct etheriphdr { 22.27 + __u8 version; 22.28 + __u32 vnet; 22.29 +} __attribute__ ((packed)); 22.30 + 22.31 +/*----------------------------------------------------------------------------*/ 22.32 +#else 22.33 +struct etheriphdr 22.34 +{ 22.35 +#if defined(__LITTLE_ENDIAN_BITFIELD) 22.36 + __u16 reserved:12, 22.37 + version:4; 22.38 +#elif defined (__BIG_ENDIAN_BITFIELD) 22.39 + __u16 version:4, 22.40 + reserved:12; 22.41 +#else 22.42 +#error "Please fix <asm/byteorder.h>" 22.43 +#endif 22.44 + 22.45 +}; 22.46 +#endif 22.47 + 22.48 +#ifndef IPPROTO_ETHERIP 22.49 +#define IPPROTO_ETHERIP 97 22.50 +#endif 22.51 + 22.52 +/*----------------------------------------------------------------------------*/ 22.53 + 22.54 +#endif /* ! _VNET_IF_ETHERIP_H_ */
23.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 23.2 +++ b/tools/vnet/vnet-module/if_varp.h Mon Nov 22 16:49:15 2004 +0000 23.3 @@ -0,0 +1,53 @@ 23.4 +/* 23.5 + * Copyright (C) 2004 Mike Wray <mike.wray@hp.com> 23.6 + * 23.7 + * This program is free software; you can redistribute it and/or modify 23.8 + * it under the terms of the GNU General Public License as published by the 23.9 + * Free Software Foundation; either version 2 of the License, or (at your 23.10 + * option) any later version. 23.11 + * 23.12 + * This program is distributed in the hope that it will be useful, but 23.13 + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 23.14 + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 23.15 + * for more details. 23.16 + * 23.17 + * You should have received a copy of the GNU General Public License along 23.18 + * with this program; if not, write to the Free software Foundation, Inc., 23.19 + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA 23.20 + * 23.21 + */ 23.22 + 23.23 +#ifndef _VNET_IF_VARP_H 23.24 +#define _VNET_IF_VARP_H 23.25 + 23.26 +typedef struct Vmac { 23.27 + unsigned char mac[ETH_ALEN]; 23.28 +} Vmac; 23.29 + 23.30 +enum { 23.31 + VARP_ID = 1, 23.32 + VARP_OP_REQUEST = 1, 23.33 + VARP_OP_ANNOUNCE = 2, 23.34 +}; 23.35 + 23.36 +typedef struct VnetMsgHdr { 23.37 + uint16_t id; 23.38 + uint16_t opcode; 23.39 +} __attribute__((packed)) VnetMsgHdr; 23.40 + 23.41 +typedef struct VarpHdr { 23.42 + VnetMsgHdr; 23.43 + uint32_t vnet; 23.44 + Vmac vmac; 23.45 + uint32_t addr; 23.46 +} __attribute__((packed)) VarpHdr; 23.47 + 23.48 +/** Default address for varp/vnet broadcasts: 224.10.0.1 */ 23.49 +#define VARP_MCAST_ADDR 0xe00a0001 23.50 + 23.51 +/** UDP port to use for varp protocol. */ 23.52 +#define VARP_PORT 1798 23.53 + 23.54 + 23.55 + 23.56 +#endif /* ! _VNET_IF_VARP_H */
24.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 24.2 +++ b/tools/vnet/vnet-module/linux/pfkeyv2.h Mon Nov 22 16:49:15 2004 +0000 24.3 @@ -0,0 +1,329 @@ 24.4 +/* PF_KEY user interface, this is defined by rfc2367 so 24.5 + * do not make arbitrary modifications or else this header 24.6 + * file will not be compliant. 24.7 + */ 24.8 + 24.9 +#ifndef _LINUX_PFKEY2_H 24.10 +#define _LINUX_PFKEY2_H 24.11 + 24.12 +#include <linux/types.h> 24.13 + 24.14 +#define PF_KEY_V2 2 24.15 +#define PFKEYV2_REVISION 199806L 24.16 + 24.17 +struct sadb_msg { 24.18 + uint8_t sadb_msg_version; 24.19 + uint8_t sadb_msg_type; 24.20 + uint8_t sadb_msg_errno; 24.21 + uint8_t sadb_msg_satype; 24.22 + uint16_t sadb_msg_len; 24.23 + uint16_t sadb_msg_reserved; 24.24 + uint32_t sadb_msg_seq; 24.25 + uint32_t sadb_msg_pid; 24.26 +} __attribute__((packed)); 24.27 +/* sizeof(struct sadb_msg) == 16 */ 24.28 + 24.29 +struct sadb_ext { 24.30 + uint16_t sadb_ext_len; 24.31 + uint16_t sadb_ext_type; 24.32 +} __attribute__((packed)); 24.33 +/* sizeof(struct sadb_ext) == 4 */ 24.34 + 24.35 +struct sadb_sa { 24.36 + uint16_t sadb_sa_len; 24.37 + uint16_t sadb_sa_exttype; 24.38 + uint32_t sadb_sa_spi; 24.39 + uint8_t sadb_sa_replay; 24.40 + uint8_t sadb_sa_state; 24.41 + uint8_t sadb_sa_auth; 24.42 + uint8_t sadb_sa_encrypt; 24.43 + uint32_t sadb_sa_flags; 24.44 +} __attribute__((packed)); 24.45 +/* sizeof(struct sadb_sa) == 16 */ 24.46 + 24.47 +struct sadb_lifetime { 24.48 + uint16_t sadb_lifetime_len; 24.49 + uint16_t sadb_lifetime_exttype; 24.50 + uint32_t sadb_lifetime_allocations; 24.51 + uint64_t sadb_lifetime_bytes; 24.52 + uint64_t sadb_lifetime_addtime; 24.53 + uint64_t sadb_lifetime_usetime; 24.54 +} __attribute__((packed)); 24.55 +/* sizeof(struct sadb_lifetime) == 32 */ 24.56 + 24.57 +struct sadb_address { 24.58 + uint16_t sadb_address_len; 24.59 + uint16_t sadb_address_exttype; 24.60 + uint8_t sadb_address_proto; 24.61 + uint8_t sadb_address_prefixlen; 24.62 + uint16_t sadb_address_reserved; 24.63 +} __attribute__((packed)); 24.64 +/* sizeof(struct sadb_address) == 8 */ 24.65 + 24.66 +struct sadb_key { 24.67 + uint16_t sadb_key_len; 24.68 + uint16_t sadb_key_exttype; 24.69 + uint16_t sadb_key_bits; 24.70 + uint16_t sadb_key_reserved; 24.71 +} __attribute__((packed)); 24.72 +/* sizeof(struct sadb_key) == 8 */ 24.73 + 24.74 +struct sadb_ident { 24.75 + uint16_t sadb_ident_len; 24.76 + uint16_t sadb_ident_exttype; 24.77 + uint16_t sadb_ident_type; 24.78 + uint16_t sadb_ident_reserved; 24.79 + uint64_t sadb_ident_id; 24.80 +} __attribute__((packed)); 24.81 +/* sizeof(struct sadb_ident) == 16 */ 24.82 + 24.83 +struct sadb_sens { 24.84 + uint16_t sadb_sens_len; 24.85 + uint16_t sadb_sens_exttype; 24.86 + uint32_t sadb_sens_dpd; 24.87 + uint8_t sadb_sens_sens_level; 24.88 + uint8_t sadb_sens_sens_len; 24.89 + uint8_t sadb_sens_integ_level; 24.90 + uint8_t sadb_sens_integ_len; 24.91 + uint32_t sadb_sens_reserved; 24.92 +} __attribute__((packed)); 24.93 +/* sizeof(struct sadb_sens) == 16 */ 24.94 + 24.95 +/* followed by: 24.96 + uint64_t sadb_sens_bitmap[sens_len]; 24.97 + uint64_t sadb_integ_bitmap[integ_len]; */ 24.98 + 24.99 +struct sadb_prop { 24.100 + uint16_t sadb_prop_len; 24.101 + uint16_t sadb_prop_exttype; 24.102 + uint8_t sadb_prop_replay; 24.103 + uint8_t sadb_prop_reserved[3]; 24.104 +} __attribute__((packed)); 24.105 +/* sizeof(struct sadb_prop) == 8 */ 24.106 + 24.107 +/* followed by: 24.108 + struct sadb_comb sadb_combs[(sadb_prop_len + 24.109 + sizeof(uint64_t) - sizeof(struct sadb_prop)) / 24.110 + sizeof(strut sadb_comb)]; */ 24.111 + 24.112 +struct sadb_comb { 24.113 + uint8_t sadb_comb_auth; 24.114 + uint8_t sadb_comb_encrypt; 24.115 + uint16_t sadb_comb_flags; 24.116 + uint16_t sadb_comb_auth_minbits; 24.117 + uint16_t sadb_comb_auth_maxbits; 24.118 + uint16_t sadb_comb_encrypt_minbits; 24.119 + uint16_t sadb_comb_encrypt_maxbits; 24.120 + uint32_t sadb_comb_reserved; 24.121 + uint32_t sadb_comb_soft_allocations; 24.122 + uint32_t sadb_comb_hard_allocations; 24.123 + uint64_t sadb_comb_soft_bytes; 24.124 + uint64_t sadb_comb_hard_bytes; 24.125 + uint64_t sadb_comb_soft_addtime; 24.126 + uint64_t sadb_comb_hard_addtime; 24.127 + uint64_t sadb_comb_soft_usetime; 24.128 + uint64_t sadb_comb_hard_usetime; 24.129 +} __attribute__((packed)); 24.130 +/* sizeof(struct sadb_comb) == 72 */ 24.131 + 24.132 +struct sadb_supported { 24.133 + uint16_t sadb_supported_len; 24.134 + uint16_t sadb_supported_exttype; 24.135 + uint32_t sadb_supported_reserved; 24.136 +} __attribute__((packed)); 24.137 +/* sizeof(struct sadb_supported) == 8 */ 24.138 + 24.139 +/* followed by: 24.140 + struct sadb_alg sadb_algs[(sadb_supported_len + 24.141 + sizeof(uint64_t) - sizeof(struct sadb_supported)) / 24.142 + sizeof(struct sadb_alg)]; */ 24.143 + 24.144 +struct sadb_alg { 24.145 + uint8_t sadb_alg_id; 24.146 + uint8_t sadb_alg_ivlen; 24.147 + uint16_t sadb_alg_minbits; 24.148 + uint16_t sadb_alg_maxbits; 24.149 + uint16_t sadb_alg_reserved; 24.150 +} __attribute__((packed)); 24.151 +/* sizeof(struct sadb_alg) == 8 */ 24.152 + 24.153 +struct sadb_spirange { 24.154 + uint16_t sadb_spirange_len; 24.155 + uint16_t sadb_spirange_exttype; 24.156 + uint32_t sadb_spirange_min; 24.157 + uint32_t sadb_spirange_max; 24.158 + uint32_t sadb_spirange_reserved; 24.159 +} __attribute__((packed)); 24.160 +/* sizeof(struct sadb_spirange) == 16 */ 24.161 + 24.162 +struct sadb_x_kmprivate { 24.163 + uint16_t sadb_x_kmprivate_len; 24.164 + uint16_t sadb_x_kmprivate_exttype; 24.165 + u_int32_t sadb_x_kmprivate_reserved; 24.166 +} __attribute__((packed)); 24.167 +/* sizeof(struct sadb_x_kmprivate) == 8 */ 24.168 + 24.169 +struct sadb_x_sa2 { 24.170 + uint16_t sadb_x_sa2_len; 24.171 + uint16_t sadb_x_sa2_exttype; 24.172 + uint8_t sadb_x_sa2_mode; 24.173 + uint8_t sadb_x_sa2_reserved1; 24.174 + uint16_t sadb_x_sa2_reserved2; 24.175 + uint32_t sadb_x_sa2_sequence; 24.176 + uint32_t sadb_x_sa2_reqid; 24.177 +} __attribute__((packed)); 24.178 +/* sizeof(struct sadb_x_sa2) == 16 */ 24.179 + 24.180 +struct sadb_x_policy { 24.181 + uint16_t sadb_x_policy_len; 24.182 + uint16_t sadb_x_policy_exttype; 24.183 + uint16_t sadb_x_policy_type; 24.184 + uint8_t sadb_x_policy_dir; 24.185 + uint8_t sadb_x_policy_reserved; 24.186 + uint32_t sadb_x_policy_id; 24.187 + uint32_t sadb_x_policy_reserved2; 24.188 +} __attribute__((packed)); 24.189 +/* sizeof(struct sadb_x_policy) == 16 */ 24.190 + 24.191 +struct sadb_x_ipsecrequest { 24.192 + uint16_t sadb_x_ipsecrequest_len; 24.193 + uint16_t sadb_x_ipsecrequest_proto; 24.194 + uint8_t sadb_x_ipsecrequest_mode; 24.195 + uint8_t sadb_x_ipsecrequest_level; 24.196 + uint16_t sadb_x_ipsecrequest_reqid; 24.197 +} __attribute__((packed)); 24.198 +/* sizeof(struct sadb_x_ipsecrequest) == 16 */ 24.199 + 24.200 +/* This defines the TYPE of Nat Traversal in use. Currently only one 24.201 + * type of NAT-T is supported, draft-ietf-ipsec-udp-encaps-06 24.202 + */ 24.203 +struct sadb_x_nat_t_type { 24.204 + uint16_t sadb_x_nat_t_type_len; 24.205 + uint16_t sadb_x_nat_t_type_exttype; 24.206 + uint8_t sadb_x_nat_t_type_type; 24.207 + uint8_t sadb_x_nat_t_type_reserved[3]; 24.208 +} __attribute__((packed)); 24.209 +/* sizeof(struct sadb_x_nat_t_type) == 8 */ 24.210 + 24.211 +/* Pass a NAT Traversal port (Source or Dest port) */ 24.212 +struct sadb_x_nat_t_port { 24.213 + uint16_t sadb_x_nat_t_port_len; 24.214 + uint16_t sadb_x_nat_t_port_exttype; 24.215 + uint16_t sadb_x_nat_t_port_port; 24.216 + uint16_t sadb_x_nat_t_port_reserved; 24.217 +} __attribute__((packed)); 24.218 +/* sizeof(struct sadb_x_nat_t_port) == 8 */ 24.219 + 24.220 +/* Message types */ 24.221 +#define SADB_RESERVED 0 24.222 +#define SADB_GETSPI 1 24.223 +#define SADB_UPDATE 2 24.224 +#define SADB_ADD 3 24.225 +#define SADB_DELETE 4 24.226 +#define SADB_GET 5 24.227 +#define SADB_ACQUIRE 6 24.228 +#define SADB_REGISTER 7 24.229 +#define SADB_EXPIRE 8 24.230 +#define SADB_FLUSH 9 24.231 +#define SADB_DUMP 10 24.232 +#define SADB_X_PROMISC 11 24.233 +#define SADB_X_PCHANGE 12 24.234 +#define SADB_X_SPDUPDATE 13 24.235 +#define SADB_X_SPDADD 14 24.236 +#define SADB_X_SPDDELETE 15 24.237 +#define SADB_X_SPDGET 16 24.238 +#define SADB_X_SPDACQUIRE 17 24.239 +#define SADB_X_SPDDUMP 18 24.240 +#define SADB_X_SPDFLUSH 19 24.241 +#define SADB_X_SPDSETIDX 20 24.242 +#define SADB_X_SPDEXPIRE 21 24.243 +#define SADB_X_SPDDELETE2 22 24.244 +#define SADB_X_NAT_T_NEW_MAPPING 23 24.245 +#define SADB_MAX 23 24.246 + 24.247 +/* Security Association flags */ 24.248 +#define SADB_SAFLAGS_PFS 1 24.249 + 24.250 +/* Security Association states */ 24.251 +#define SADB_SASTATE_LARVAL 0 24.252 +#define SADB_SASTATE_MATURE 1 24.253 +#define SADB_SASTATE_DYING 2 24.254 +#define SADB_SASTATE_DEAD 3 24.255 +#define SADB_SASTATE_MAX 3 24.256 + 24.257 +/* Security Association types */ 24.258 +#define SADB_SATYPE_UNSPEC 0 24.259 +#define SADB_SATYPE_AH 2 24.260 +#define SADB_SATYPE_ESP 3 24.261 +#define SADB_SATYPE_RSVP 5 24.262 +#define SADB_SATYPE_OSPFV2 6 24.263 +#define SADB_SATYPE_RIPV2 7 24.264 +#define SADB_SATYPE_MIP 8 24.265 +#define SADB_X_SATYPE_IPCOMP 9 24.266 +#define SADB_SATYPE_MAX 9 24.267 + 24.268 +/* Authentication algorithms */ 24.269 +#define SADB_AALG_NONE 0 24.270 +#define SADB_AALG_MD5HMAC 2 24.271 +#define SADB_AALG_SHA1HMAC 3 24.272 +#define SADB_X_AALG_SHA2_256HMAC 5 24.273 +#define SADB_X_AALG_SHA2_384HMAC 6 24.274 +#define SADB_X_AALG_SHA2_512HMAC 7 24.275 +#define SADB_X_AALG_RIPEMD160HMAC 8 24.276 +#define SADB_X_AALG_NULL 251 /* kame */ 24.277 +#define SADB_AALG_MAX 251 24.278 + 24.279 +/* Encryption algorithms */ 24.280 +#define SADB_EALG_NONE 0 24.281 +#define SADB_EALG_DESCBC 2 24.282 +#define SADB_EALG_3DESCBC 3 24.283 +#define SADB_X_EALG_CASTCBC 6 24.284 +#define SADB_X_EALG_BLOWFISHCBC 7 24.285 +#define SADB_EALG_NULL 11 24.286 +#define SADB_X_EALG_AESCBC 12 24.287 +#define SADB_EALG_MAX 12 24.288 + 24.289 +/* Compression algorithms */ 24.290 +#define SADB_X_CALG_NONE 0 24.291 +#define SADB_X_CALG_OUI 1 24.292 +#define SADB_X_CALG_DEFLATE 2 24.293 +#define SADB_X_CALG_LZS 3 24.294 +#define SADB_X_CALG_LZJH 4 24.295 +#define SADB_X_CALG_MAX 4 24.296 + 24.297 +/* Extension Header values */ 24.298 +#define SADB_EXT_RESERVED 0 24.299 +#define SADB_EXT_SA 1 24.300 +#define SADB_EXT_LIFETIME_CURRENT 2 24.301 +#define SADB_EXT_LIFETIME_HARD 3 24.302 +#define SADB_EXT_LIFETIME_SOFT 4 24.303 +#define SADB_EXT_ADDRESS_SRC 5 24.304 +#define SADB_EXT_ADDRESS_DST 6 24.305 +#define SADB_EXT_ADDRESS_PROXY 7 24.306 +#define SADB_EXT_KEY_AUTH 8 24.307 +#define SADB_EXT_KEY_ENCRYPT 9 24.308 +#define SADB_EXT_IDENTITY_SRC 10 24.309 +#define SADB_EXT_IDENTITY_DST 11 24.310 +#define SADB_EXT_SENSITIVITY 12 24.311 +#define SADB_EXT_PROPOSAL 13 24.312 +#define SADB_EXT_SUPPORTED_AUTH 14 24.313 +#define SADB_EXT_SUPPORTED_ENCRYPT 15 24.314 +#define SADB_EXT_SPIRANGE 16 24.315 +#define SADB_X_EXT_KMPRIVATE 17 24.316 +#define SADB_X_EXT_POLICY 18 24.317 +#define SADB_X_EXT_SA2 19 24.318 +/* The next four entries are for setting up NAT Traversal */ 24.319 +#define SADB_X_EXT_NAT_T_TYPE 20 24.320 +#define SADB_X_EXT_NAT_T_SPORT 21 24.321 +#define SADB_X_EXT_NAT_T_DPORT 22 24.322 +#define SADB_X_EXT_NAT_T_OA 23 24.323 +#define SADB_EXT_MAX 23 24.324 + 24.325 +/* Identity Extension values */ 24.326 +#define SADB_IDENTTYPE_RESERVED 0 24.327 +#define SADB_IDENTTYPE_PREFIX 1 24.328 +#define SADB_IDENTTYPE_FQDN 2 24.329 +#define SADB_IDENTTYPE_USERFQDN 3 24.330 +#define SADB_IDENTTYPE_MAX 3 24.331 + 24.332 +#endif /* !(_LINUX_PFKEY2_H) */
25.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 25.2 +++ b/tools/vnet/vnet-module/random.c Mon Nov 22 16:49:15 2004 +0000 25.3 @@ -0,0 +1,101 @@ 25.4 +/* 25.5 + * Copyright (C) 2004 Mike Wray <mike.wray@hp.com> 25.6 + * 25.7 + * This program is free software; you can redistribute it and/or modify 25.8 + * it under the terms of the GNU General Public License as published by the 25.9 + * Free Software Foundation; either version 2 of the License, or (at your 25.10 + * option) any later version. 25.11 + * 25.12 + * This program is distributed in the hope that it will be useful, but 25.13 + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 25.14 + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25.15 + * for more details. 25.16 + * 25.17 + * You should have received a copy of the GNU General Public License along 25.18 + * with this program; if not, write to the Free software Foundation, Inc., 25.19 + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA 25.20 + * 25.21 + */ 25.22 +#include <linux/config.h> 25.23 +#include <linux/module.h> 25.24 +#include <linux/init.h> 25.25 +#include <linux/sched.h> 25.26 +#include <linux/random.h> 25.27 + 25.28 +#include "hash_table.h" 25.29 + 25.30 +#define MODULE_NAME "RANDOM" 25.31 +#define DEBUG 1 25.32 +#undef DEBUG 25.33 +#include "debug.h" 25.34 + 25.35 +/** @file 25.36 + * Source of randomness. 25.37 + * Current implementation is not enough. 25.38 + * Needs to be cryptographically strong. 25.39 + */ 25.40 + 25.41 +static unsigned long seed = 0; 25.42 +static unsigned long count = 0; 25.43 + 25.44 +static unsigned long stir(unsigned long *a, unsigned long b){ 25.45 + pseudo_des(a, &b); 25.46 + return b; 25.47 +} 25.48 + 25.49 +/** Get one random byte. 25.50 + * 25.51 + * @return random byte 25.52 + */ 25.53 +int get_random_byte(void){ 25.54 + return stir(&seed, ++count); 25.55 +} 25.56 + 25.57 +#if 0 25.58 +/* Get some random bytes. 25.59 + * 25.60 + * @param dst destination for the bytes 25.61 + * @param dst_n number of bytes to get 25.62 + */ 25.63 +void get_random_bytes(void *dst, int dst_n){ 25.64 + int i; 25.65 + char *p = (char *)dst; 25.66 + for(i = 0; i < dst_n; i++){ 25.67 + *p++ = get_random_byte(); 25.68 + } 25.69 +} 25.70 +#endif 25.71 + 25.72 +/** Contribute a random byte. 25.73 + * 25.74 + * @param b byte to contribute 25.75 + */ 25.76 +void add_random_byte(int b){ 25.77 + stir(&seed, ++count); 25.78 + stir(&seed, b); 25.79 +} 25.80 + 25.81 +/** Contribute some random bytes. 25.82 + * 25.83 + * @param src bytes to contribute 25.84 + * @param src_n number of bytes 25.85 + */ 25.86 +void add_random_bytes(const void *src, int src_n){ 25.87 + int i; 25.88 + char *p = (char *)src; 25.89 + for(i = 0; i < src_n; i++){ 25.90 + add_random_byte(*p++); 25.91 + } 25.92 +} 25.93 + 25.94 +int __init random_module_init(void){ 25.95 + int dummy; 25.96 + int tmp = jiffies; 25.97 + seed = (unsigned long)&dummy; 25.98 + add_random_byte(tmp); 25.99 + return 0; 25.100 +} 25.101 + 25.102 +void __exit random_module_exit(void){ 25.103 +} 25.104 +
26.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 26.2 +++ b/tools/vnet/vnet-module/random.h Mon Nov 22 16:49:15 2004 +0000 26.3 @@ -0,0 +1,30 @@ 26.4 +/* 26.5 + * Copyright (C) 2004 Mike Wray <mike.wray@hp.com> 26.6 + * 26.7 + * This program is free software; you can redistribute it and/or modify 26.8 + * it under the terms of the GNU General Public License as published by the 26.9 + * Free Software Foundation; either version 2 of the License, or (at your 26.10 + * option) any later version. 26.11 + * 26.12 + * This program is distributed in the hope that it will be useful, but 26.13 + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 26.14 + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 26.15 + * for more details. 26.16 + * 26.17 + * You should have received a copy of the GNU General Public License along 26.18 + * with this program; if not, write to the Free software Foundation, Inc., 26.19 + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA 26.20 + * 26.21 + */ 26.22 +#ifndef __VNET_RANDOM_H__ 26.23 +#define __VNET_RANDOM_H__ 26.24 + 26.25 +extern int get_random_byte(void); 26.26 +extern void get_random_bytes(void *dst, int dst_n); 26.27 +extern void add_random_byte(int b); 26.28 +extern void add_random_bytes(const void *src, int src_n); 26.29 + 26.30 +extern int random_module_init(void); 26.31 +extern void random_module_exit(void); 26.32 + 26.33 +#endif /* ! __VNET_RANDOM_H__ */
27.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 27.2 +++ b/tools/vnet/vnet-module/sa.c Mon Nov 22 16:49:15 2004 +0000 27.3 @@ -0,0 +1,670 @@ 27.4 +/* 27.5 + * Copyright (C) 2004 Mike Wray <mike.wray@hp.com> 27.6 + * 27.7 + * This program is free software; you can redistribute it and/or modify 27.8 + * it under the terms of the GNU General Public License as published by the 27.9 + * Free Software Foundation; either version 2 of the License, or (at your 27.10 + * option) any later version. 27.11 + * 27.12 + * This program is distributed in the hope that it will be useful, but 27.13 + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 27.14 + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 27.15 + * for more details. 27.16 + * 27.17 + * You should have received a copy of the GNU General Public License along 27.18 + * with this program; if not, write to the Free software Foundation, Inc., 27.19 + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA 27.20 + * 27.21 + */ 27.22 +#include <linux/config.h> 27.23 +#include <linux/kernel.h> 27.24 + 27.25 +#include <net/ip.h> 27.26 +#include <net/protocol.h> 27.27 +#include <net/route.h> 27.28 +#include <linux/skbuff.h> 27.29 + 27.30 +#include <linux/in.h> 27.31 +#include <linux/inet.h> 27.32 +#include <linux/netdevice.h> 27.33 + 27.34 +#include <sa.h> 27.35 +#include <sa_algorithm.h> 27.36 +#include "hash_table.h" 27.37 +#include "allocate.h" 27.38 + 27.39 +#define MODULE_NAME "IPSEC" 27.40 +#define DEBUG 1 27.41 +#undef DEBUG 27.42 +#include "debug.h" 27.43 + 27.44 +/** @file IPSEC Security Association (SA). 27.45 + */ 27.46 + 27.47 +/** Maximum number of protocols.*/ 27.48 +#define INET_PROTOCOL_MAX 256 27.49 + 27.50 +/** Table of SA types indexed by protocol. */ 27.51 +static SAType *sa_type[INET_PROTOCOL_MAX] = {}; 27.52 + 27.53 +/** Hash a protocol number. 27.54 + * 27.55 + * @param protocol protocol number 27.56 + * @return hashcode 27.57 + */ 27.58 +static inline unsigned char InetProtocol_hash(int protocol){ 27.59 + return (protocol) & (INET_PROTOCOL_MAX - 1); 27.60 +} 27.61 + 27.62 +/** Register an SA type. 27.63 + * It is an error if an SA type is already registered for the protocol. 27.64 + * 27.65 + * @param type SA type 27.66 + * @return 0 on success, error code otherwise 27.67 + */ 27.68 +int SAType_add(SAType *type){ 27.69 + int err = -EINVAL; 27.70 + int hash; 27.71 + if(!type) goto exit; 27.72 + hash = InetProtocol_hash(type->protocol); 27.73 + if(sa_type[hash]) goto exit; 27.74 + err = 0; 27.75 + sa_type[hash] = type; 27.76 + exit: 27.77 + return err; 27.78 +} 27.79 + 27.80 +/** Deregister an SA type. 27.81 + * It is an error if no SA type is registered for the protocol. 27.82 + * 27.83 + * @param type SA type 27.84 + * @return 0 on success, error code otherwise 27.85 + */ 27.86 +int SAType_del(SAType *type){ 27.87 + int err = -EINVAL; 27.88 + int hash; 27.89 + if(!type) goto exit; 27.90 + hash = InetProtocol_hash(type->protocol); 27.91 + if(!sa_type[hash]) goto exit; 27.92 + err = 0; 27.93 + sa_type[hash] = NULL; 27.94 + exit: 27.95 + return err; 27.96 +} 27.97 + 27.98 +int SAType_get(int protocol, SAType **type){ 27.99 + int err = -ENOENT; 27.100 + int hash; 27.101 + hash = InetProtocol_hash(protocol); 27.102 + *type = sa_type[hash]; 27.103 + if(!*type) goto exit; 27.104 + err = 0; 27.105 + exit: 27.106 + return err; 27.107 +} 27.108 + 27.109 +/* Defeat compiler warnings about unused functions. */ 27.110 +static int sa_key_check(SAKey *key, enum sa_alg_type type) __attribute__((unused)); 27.111 +static u32 random_spi(void) __attribute__((unused)); 27.112 +static u32 generate_key(u32 key, u32 offset, u32 spi) __attribute__((unused)); 27.113 + 27.114 +/** Check a key has an acceptable length for an algorithm. 27.115 + * 27.116 + * @param key key 27.117 + * @param type algorithm 27.118 + * @return 0 on success, error code otherwise 27.119 + */ 27.120 +static int sa_key_check(SAKey *key, enum sa_alg_type type){ 27.121 + return 0; 27.122 +} 27.123 + 27.124 +static unsigned long sa_spi_counter = 0; 27.125 + 27.126 +/** Generate a random spi. 27.127 + * Uses a hashed counter. 27.128 + * 27.129 + * @return spi 27.130 + */ 27.131 +static u32 random_spi(void){ 27.132 + unsigned long left, right = 0; 27.133 + u32 spi; 27.134 + do{ 27.135 + left = sa_spi_counter++; 27.136 + pseudo_des(&left, &right); 27.137 + spi = right; 27.138 + } while(!spi); 27.139 + return spi; 27.140 +} 27.141 + 27.142 +/** Mangle some input to generate output. 27.143 + * This is used to derive spis and keying material from secrets, 27.144 + * so it probably ought to be cryptographically strong. 27.145 + * Probably ought to use a good hash (sha1) or cipher (aes). 27.146 + * 27.147 + * @param input input values 27.148 + * @param n number of values 27.149 + * @return mangled value 27.150 + */ 27.151 +static u32 mangle(u32 input[], int n){ 27.152 + unsigned long left = 0, right = 0; 27.153 + int i; 27.154 + for(i=0; i<n; i++){ 27.155 + left ^= input[i]; 27.156 + pseudo_des(&left, &right); 27.157 + } 27.158 + return (u32)right; 27.159 +} 27.160 + 27.161 +/** Generate a spi for a given protocol and address, using a secret key. 27.162 + * The offset is used when it is necessary to generate more than one spi 27.163 + * for the same protocol and address. 27.164 + * 27.165 + * @param key key 27.166 + * @param offset offset 27.167 + * @param protocol protocol 27.168 + * @param addr IP address 27.169 + * @return spi 27.170 + */ 27.171 +static u32 generate_spi(u32 key, u32 offset, u32 protocol, u32 addr){ 27.172 + u32 input[] = { key, offset, protocol, addr }; 27.173 + u32 spi; 27.174 + dprintf(">\n"); 27.175 + spi = mangle(input, 4); 27.176 + dprintf("< spi=%x\n", spi); 27.177 + return spi; 27.178 +} 27.179 + 27.180 +/** Generate keying material for a given spi, based on a 27.181 + * secret. 27.182 + * 27.183 + * @param key secret 27.184 + * @param offset offset 27.185 + * @param spi spi 27.186 + * @return keying material 27.187 + */ 27.188 +static u32 generate_key(u32 key, u32 offset, u32 spi){ 27.189 + u32 input[] = { key, offset, spi }; 27.190 + return mangle(input, 3); 27.191 +} 27.192 + 27.193 +/** Allocate a spi. 27.194 + * Want to use random ones. 27.195 + * So check for ones not in use. 27.196 + * 27.197 + * When using static keying, both ends need to agree on key. 27.198 + * How does that work? Also, will suddenly get traffic using a spi, 27.199 + * and will have to create SA then. Or need to create in advance. 27.200 + * But can't do that because don't know peers. 27.201 + * When get message on a spi that doesn't exist - do what? 27.202 + * Use a spi related to the destination addr and a secret. 27.203 + * Then receiver can check if spi is ok and create SA on demand. 27.204 + * Use hash of key, protocol, addr to generate. Then have to check 27.205 + * for in-use because of potential collisions. Receiver can do the 27.206 + * same hash and check spi is in usable range. Then derive keys from 27.207 + * the spi (using another secret). 27.208 + * 27.209 + * @param key spi generation key 27.210 + * @param protocol protocol 27.211 + * @param addr IP address 27.212 + * @param spip return parameter for spi 27.213 + * @return 0 on success, error code otherwise 27.214 + */ 27.215 +int sa_spi_alloc(u32 key, u32 protocol, u32 addr, u32 *spip){ 27.216 + int err = 0; 27.217 + int i = 0, n = 100; 27.218 + u32 spi; 27.219 + for(i = 0; i < n; i++, spi++){ 27.220 + spi = generate_spi(key, i, protocol, addr); 27.221 + if(!spi) continue; 27.222 + if(!sa_table_lookup_spi(spi, protocol, addr)){ 27.223 + *spip = spi; 27.224 + goto exit; 27.225 + } 27.226 + } 27.227 + err = -ENOMEM; 27.228 + exit: 27.229 + return err; 27.230 +} 27.231 + 27.232 +/** Table of SAs. Indexed by unique id and spi/protocol/addr triple. 27.233 + */ 27.234 +static HashTable *sa_table = NULL; 27.235 + 27.236 +static u32 sa_id = 1; 27.237 + 27.238 +/** Hash an SA id. 27.239 + * 27.240 + * @param id SA id 27.241 + * @return hashcode 27.242 + */ 27.243 +static inline Hashcode sa_table_hash_id(u32 id){ 27.244 + return hash_ul(id); 27.245 +} 27.246 + 27.247 +/** Hash SA spi/protocol/addr. 27.248 + * 27.249 + * @param spi spi 27.250 + * @param protocol protocol 27.251 + * @param addr IP address 27.252 + * @return hashcode 27.253 + */ 27.254 +static inline Hashcode sa_table_hash_spi(u32 spi, u32 protocol, u32 addr){ 27.255 + Hashcode h = 0; 27.256 + h = hash_2ul(spi, protocol); 27.257 + h = hash_hul(h, addr); 27.258 + return h; 27.259 +} 27.260 + 27.261 +/** Test if an SA entry has a given value. 27.262 + * 27.263 + * @param arg contains SA pointer 27.264 + * @param table hashtable 27.265 + * @param entry entry containing SA 27.266 + * @return 1 if it does, 0 otherwise 27.267 + */ 27.268 +static int sa_table_state_fn(TableArg arg, HashTable *table, HTEntry *entry){ 27.269 + return entry->value == arg.ptr; 27.270 +} 27.271 + 27.272 +/** Test if an SA entry has a given id. 27.273 + * 27.274 + * @param arg contains SA id 27.275 + * @param table hashtable 27.276 + * @param entry entry containing SA 27.277 + * @return 1 if it does, 0 otherwise 27.278 + */ 27.279 +static int sa_table_id_fn(TableArg arg, HashTable *table, HTEntry *entry){ 27.280 + SAState *state = entry->value; 27.281 + u32 id = arg.ul; 27.282 + return state->ident.id == id; 27.283 +} 27.284 + 27.285 +/** Test if an SA entry has a given spi/protocol/addr. 27.286 + * 27.287 + * @param arg contains SAIdent pointer 27.288 + * @param table hashtable 27.289 + * @param entry entry containing SA 27.290 + * @return 1 if it does, 0 otherwise 27.291 + */ 27.292 +static int sa_table_spi_fn(TableArg arg, HashTable *table, HTEntry *entry){ 27.293 + SAState *state = entry->value; 27.294 + SAIdent *ident = arg.ptr; 27.295 + return state->ident.spi == ident->spi 27.296 + && state->ident.protocol == ident->protocol 27.297 + && state->ident.addr == ident->addr; 27.298 +} 27.299 + 27.300 +/** Free an SA entry. Decrements the SA refcount and frees the entry. 27.301 + * 27.302 + * @param table containing table 27.303 + * @param entry to free 27.304 + */ 27.305 +void sa_table_free_fn(HashTable *table, HTEntry *entry){ 27.306 + if(!entry) return; 27.307 + if(entry->value){ 27.308 + SAState *state = entry->value; 27.309 + SAState_decref(state); 27.310 + } 27.311 + deallocate(entry); 27.312 +} 27.313 + 27.314 +/** Initialize the SA table. 27.315 + * 27.316 + * @return 0 on success, error code otherwise 27.317 + */ 27.318 +int sa_table_init(void){ 27.319 + int err = 0; 27.320 + sa_table = HashTable_new(0); 27.321 + if(!sa_table){ 27.322 + err = -ENOMEM; 27.323 + goto exit; 27.324 + } 27.325 + sa_table->entry_free_fn = sa_table_free_fn; 27.326 + 27.327 + exit: 27.328 + return err; 27.329 +} 27.330 + 27.331 +void sa_table_exit(void){ 27.332 + HashTable_free(sa_table); 27.333 +} 27.334 + 27.335 +/** Remove an SA from the table. 27.336 + * 27.337 + * @param state SA 27.338 + */ 27.339 +int sa_table_delete(SAState *state){ 27.340 + int count = 0; 27.341 + Hashcode h1, h2; 27.342 + TableArg arg = { .ptr = state }; 27.343 + // Remove by id. 27.344 + h1 = sa_table_hash_id(state->ident.id); 27.345 + count += HashTable_remove_entry(sa_table, h1, sa_table_state_fn, arg); 27.346 + // Remove by spi/protocol/addr if spi nonzero. 27.347 + if(!state->ident.spi) goto exit; 27.348 + h2 = sa_table_hash_spi(state->ident.spi, state->ident.protocol, state->ident.addr); 27.349 + if(h1 == h2) goto exit; 27.350 + count += HashTable_remove_entry(sa_table, h2, sa_table_state_fn, arg); 27.351 + exit: 27.352 + return count; 27.353 +} 27.354 + 27.355 +/** Add an SA to the table. 27.356 + * The SA is indexed by id and spi/protocol/addr (if the spi is non-zero). 27.357 + * 27.358 + * @param state SA 27.359 + * @return 0 on success, error code otherwise 27.360 + */ 27.361 +int sa_table_add(SAState *state){ 27.362 + int err = 0; 27.363 + Hashcode h1, h2; 27.364 + int entries = 0; 27.365 + 27.366 + dprintf(">\n"); 27.367 + // Index by id. 27.368 + h1 = sa_table_hash_id(state->ident.id); 27.369 + if(!HashTable_add_entry(sa_table, h1, HKEY(state->ident.id), state)){ 27.370 + err = -ENOMEM; 27.371 + goto exit; 27.372 + } 27.373 + entries++; 27.374 + SAState_incref(state); 27.375 + // Index by spi/protocol/addr if spi non-zero. 27.376 + if(state->ident.spi){ 27.377 + h2 = sa_table_hash_spi(state->ident.spi, state->ident.protocol, state->ident.addr); 27.378 + if(h1 != h2){ 27.379 + if(!HashTable_add_entry(sa_table, h2, HKEY(state->ident.id), state)){ 27.380 + err = -ENOMEM; 27.381 + goto exit; 27.382 + } 27.383 + entries++; 27.384 + SAState_incref(state); 27.385 + } 27.386 + } 27.387 + exit: 27.388 + if(err && entries){ 27.389 + sa_table_delete(state); 27.390 + } 27.391 + dprintf("< err=%d\n", err); 27.392 + return err; 27.393 +} 27.394 + 27.395 + 27.396 +/** Find an SA by spi/protocol/addr. 27.397 + * Increments the SA refcount on success. 27.398 + * 27.399 + * @param spi spi 27.400 + * @param protocol protocol 27.401 + * @param addr IP address 27.402 + * @return SA or NULL 27.403 + */ 27.404 +SAState * sa_table_lookup_spi(u32 spi, u32 protocol, u32 addr){ 27.405 + SAState *state = NULL; 27.406 + Hashcode h; 27.407 + SAIdent id = { 27.408 + .spi = spi, 27.409 + .protocol = protocol, 27.410 + .addr = addr }; 27.411 + TableArg arg = { .ptr = &id }; 27.412 + HTEntry *entry = NULL; 27.413 + 27.414 + h = sa_table_hash_spi(spi, protocol, addr); 27.415 + entry = HashTable_find_entry(sa_table, h, sa_table_spi_fn, arg); 27.416 + if(entry){ 27.417 + state = entry->value; 27.418 + SAState_incref(state); 27.419 + } 27.420 + return state; 27.421 +} 27.422 + 27.423 +/** Find an SA by unique id. 27.424 + * Increments the SA refcount on success. 27.425 + * 27.426 + * @param id id 27.427 + * @return SA or NULL 27.428 + */ 27.429 +SAState * sa_table_lookup_id(u32 id){ 27.430 + Hashcode h; 27.431 + TableArg arg = { .ul = id }; 27.432 + HTEntry *entry = NULL; 27.433 + SAState *state = NULL; 27.434 + 27.435 + dprintf("> id=%u\n", id); 27.436 + h = sa_table_hash_id(id); 27.437 + entry = HashTable_find_entry(sa_table, h, sa_table_id_fn, arg); 27.438 + if(entry){ 27.439 + state = entry->value; 27.440 + SAState_incref(state); 27.441 + } 27.442 + dprintf("< state=%p\n", state); 27.443 + return state; 27.444 +} 27.445 + 27.446 +/** Replace an existing SA by another in the table. 27.447 + * The existing SA is not removed if the new one cannot be added. 27.448 + * 27.449 + * @param existing SA to replace 27.450 + * @param state new SA 27.451 + * @return 0 on success, error code otherwise 27.452 + */ 27.453 +static int sa_table_replace(SAState *existing, SAState *state){ 27.454 + int err = 0; 27.455 + // Need check for in-use? 27.456 + 27.457 + dprintf(">\n"); 27.458 + if(existing->keying.state != SA_STATE_ACQUIRE){ 27.459 + err = -EINVAL; 27.460 + goto exit; 27.461 + } 27.462 + // replace it. 27.463 + err = sa_table_add(state); 27.464 + if(err) goto exit; 27.465 + sa_table_delete(existing); 27.466 + exit: 27.467 + dprintf("< err=%d\n", err); 27.468 + return err; 27.469 +} 27.470 + 27.471 +/** Allocate an SA. 27.472 + * 27.473 + * @return SA or NULL 27.474 + */ 27.475 +SAState *SAState_alloc(void){ 27.476 + SAState *state; 27.477 + 27.478 + dprintf(">\n"); 27.479 + state = kmalloc(sizeof(SAState), GFP_ATOMIC); 27.480 + if(!state) goto exit; 27.481 + *state = (SAState){}; 27.482 + atomic_set(&state->refcount, 1); 27.483 + state->lock = SPIN_LOCK_UNLOCKED; 27.484 + exit: 27.485 + dprintf("< state=%p\n", state); 27.486 + return state; 27.487 +} 27.488 + 27.489 +/** Create an SA in initial state. 27.490 + * It has no spi and its keying state is acquire. 27.491 + * It must have a unique id, protocol and address. 27.492 + * At some point it should get updated with a complete SA. 27.493 + * 27.494 + * @param ident SA identifier 27.495 + * @param statep return parameter for new SA 27.496 + * @return 0 on success, error code otherwise 27.497 + */ 27.498 +int SAState_init(SAIdent *ident, SAState **statep){ 27.499 + int err = 0; 27.500 + SAState *state = NULL; 27.501 + 27.502 + if(ident->spi || !ident->id){ 27.503 + err = -EINVAL; 27.504 + goto exit; 27.505 + } 27.506 + state = SAState_alloc(); 27.507 + if (!state){ 27.508 + err = -ENOMEM; 27.509 + goto exit; 27.510 + } 27.511 + state->ident = *ident; 27.512 + state->keying.state = SA_STATE_ACQUIRE; 27.513 + exit: 27.514 + return err; 27.515 +} 27.516 + 27.517 +/** Create a complete SA, with spi and cipher suite. 27.518 + * 27.519 + * @param info SA parameters 27.520 + * @param statep return parameter for new SA 27.521 + * @return 0 on success, error code otherwise 27.522 + */ 27.523 +int SAState_create(SAInfo *info, SAState **statep){ 27.524 + int err = 0; 27.525 + SAState *state = NULL; 27.526 + 27.527 + dprintf(">\n"); 27.528 + state = SAState_alloc(); 27.529 + if (!state){ 27.530 + err = -ENOMEM; 27.531 + goto exit; 27.532 + } 27.533 + state->ident = info->ident; 27.534 + state->limits = info->limits; 27.535 + state->digest = info->digest; 27.536 + state->cipher = info->cipher; 27.537 + state->compress = info->compress; 27.538 + state->security = info->security; 27.539 + err = SAType_get(state->ident.protocol, &state->type); 27.540 + if (err) goto exit; 27.541 + err = state->type->init(state, NULL); 27.542 + if (err) goto exit; 27.543 + state->keying.state = SA_STATE_VALID; 27.544 + exit: 27.545 + if(err){ 27.546 + SAState_decref(state); 27.547 + state = NULL; 27.548 + } 27.549 + *statep = state; 27.550 + dprintf("< err=%d\n", err); 27.551 + return err; 27.552 +} 27.553 + 27.554 +/** Create an SA for the given spi etc. 27.555 + * For now we fix the cipher suite and the keys. 27.556 + * Digest is SHA1 HMAC with a 128-bit key. 27.557 + * Cipher is AES (Rijndael) in CBC mode with a 128-bit key. 27.558 + * 27.559 + * The cipher suite and keys should really come from policy, with the 27.560 + * possibility of negotiating them with the peer (using IKE). 27.561 + * Negotiation creates difficulties though - because the SA cannot 27.562 + * be created immediately we have to be able to queue packets 27.563 + * while the SA is being negotiated. 27.564 + * 27.565 + * @param spi spi 27.566 + * @param protocol protocol 27.567 + * @param addr address 27.568 + * @param sa return parameter for SA 27.569 + * @return 0 on success, error code otherwise 27.570 + */ 27.571 +int sa_create(int security, u32 spi, u32 protocol, u32 addr, SAState **sa){ 27.572 + int err = 0; 27.573 + SAInfo info = {}; 27.574 + char *digest_name = "sha1"; 27.575 + char *digest_key = "0123456789abcdef"; 27.576 + int digest_key_n = strlen(digest_key); 27.577 + char *cipher_name= "aes"; 27.578 + char *cipher_key = "0123456789ABCDEF"; 27.579 + int cipher_key_n = strlen(cipher_key); 27.580 + 27.581 + dprintf("> security=%d spi=%u protocol=%u addr=" IPFMT "\n", 27.582 + security, spi, protocol, NIPQUAD(addr)); 27.583 + if(!spi){ 27.584 + spi = generate_spi(0, 0, protocol, addr); 27.585 + } 27.586 + dprintf("> info...\n"); 27.587 + info.ident.id = sa_id++; 27.588 + info.ident.spi = spi; 27.589 + info.ident.protocol = protocol; 27.590 + info.ident.addr = addr; 27.591 + info.security = security; 27.592 + 27.593 + //sa_algorithm_probe_all(); 27.594 + 27.595 + dprintf("> digest name=%s key_n=%d\n", digest_name, digest_key_n); 27.596 + strcpy(info.digest.name, digest_name); 27.597 + info.digest.bits = digest_key_n * 8; 27.598 + memcpy(info.digest.key, digest_key, digest_key_n); 27.599 + 27.600 + if(security & SA_CONF){ 27.601 + dprintf("> cipher name=%s key_n=%d\n", cipher_name, cipher_key_n); 27.602 + strcpy(info.cipher.name, cipher_name); 27.603 + info.cipher.bits = cipher_key_n * 8; 27.604 + memcpy(info.cipher.key, cipher_key, cipher_key_n); 27.605 + } else { 27.606 + dprintf("> cipher name=%s key_n=%d\n", "cipher_null", 0); 27.607 + strcpy(info.cipher.name, "cipher_null"); 27.608 + info.cipher.bits = 0; 27.609 + memset(info.cipher.key, 0, sizeof(info.cipher.key)); 27.610 + } 27.611 + 27.612 + err = sa_set(&info, 0, sa); 27.613 + dprintf("< err=%d\n", err); 27.614 + return err; 27.615 +} 27.616 + 27.617 +/** Create or update an SA. 27.618 + * The SA is added to the table. 27.619 + * 27.620 + * @param info SA parameters 27.621 + * @param update create if zero, update otherwise 27.622 + * @return 0 on success, error code otherwise 27.623 + */ 27.624 +int sa_set(SAInfo *info, int update, SAState **val){ 27.625 + int err = 0; 27.626 + SAState *state = NULL; 27.627 + SAState *existing = NULL; 27.628 + 27.629 + dprintf("> info=%p update=%d val=%p\n", info, update, val); 27.630 + existing = sa_table_lookup_id(info->ident.id); 27.631 + if(update && !existing){ 27.632 + err = -ENOENT; 27.633 + } else if(!update && existing){ 27.634 + err = -EINVAL; 27.635 + } 27.636 + if(err) goto exit; 27.637 + err = SAState_create(info, &state); 27.638 + if (err) goto exit; 27.639 + if(existing){ 27.640 + err = sa_table_replace(existing, state); 27.641 + } else { 27.642 + err = sa_table_add(state); 27.643 + } 27.644 + exit: 27.645 + if(existing) SAState_decref(existing); 27.646 + if(val && !err){ 27.647 + *val = state; 27.648 + } else { 27.649 + SAState_decref(state); 27.650 + } 27.651 + dprintf("< err=%d\n", err); 27.652 + return err; 27.653 +} 27.654 + 27.655 +/** Delete an SA. Removes it from the SA table. 27.656 + * It is an error if no SA with the given id exists. 27.657 + * 27.658 + * @param id SA id 27.659 + * @return 0 on success, error code otherwise 27.660 + */ 27.661 +int sa_delete(int id){ 27.662 + int err = 0; 27.663 + SAState *state; 27.664 + state = sa_table_lookup_id(id); 27.665 + if (!state){ 27.666 + err = -ENOENT; 27.667 + goto exit; 27.668 + } 27.669 + sa_table_delete(state); 27.670 + SAState_decref(state); 27.671 + exit: 27.672 + return err; 27.673 +}
28.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 28.2 +++ b/tools/vnet/vnet-module/sa.h Mon Nov 22 16:49:15 2004 +0000 28.3 @@ -0,0 +1,199 @@ 28.4 +/* 28.5 + * Copyright (C) 2004 Mike Wray <mike.wray@hp.com> 28.6 + * 28.7 + * This program is free software; you can redistribute it and/or modify 28.8 + * it under the terms of the GNU General Public License as published by the 28.9 + * Free Software Foundation; either version 2 of the License, or (at your 28.10 + * option) any later version. 28.11 + * 28.12 + * This program is distributed in the hope that it will be useful, but 28.13 + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 28.14 + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 28.15 + * for more details. 28.16 + * 28.17 + * You should have received a copy of the GNU General Public License along 28.18 + * with this program; if not, write to the Free software Foundation, Inc., 28.19 + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA 28.20 + * 28.21 + */ 28.22 +#ifndef __VNET_SA_H__ 28.23 +#define __VNET_SA_H__ 28.24 + 28.25 +#include <linux/types.h> 28.26 +#include <linux/crypto.h> 28.27 + 28.28 +#include <tunnel.h> 28.29 + 28.30 +#ifndef CRYPTO_MAX_KEY_BYTES 28.31 +#define CRYPTO_MAX_KEY_BYTES 64 28.32 +#define CRYPTO_MAX_KEY_BITS (CRYPTO_MAX_KEY_BYTES * 8) 28.33 +#endif 28.34 + 28.35 +typedef struct SALimits { 28.36 + u64 bytes_soft; 28.37 + u64 bytes_hard; 28.38 + u64 packets_soft; 28.39 + u64 packets_hard; 28.40 +} SALimits; 28.41 + 28.42 +typedef struct SACounts { 28.43 + u64 bytes; 28.44 + u64 packets; 28.45 + u32 integrity_failures; 28.46 +} SACounts; 28.47 + 28.48 +typedef struct SAReplay { 28.49 + int replay; 28.50 + u32 send_seq; 28.51 + u32 recv_seq; 28.52 + u32 bitmap; 28.53 + u32 replay_window; 28.54 +} SAReplay; 28.55 + 28.56 +typedef struct SAKey { 28.57 + char name[CRYPTO_MAX_ALG_NAME]; 28.58 + int bits; 28.59 + char key[CRYPTO_MAX_KEY_BYTES]; 28.60 +} SAKey; 28.61 + 28.62 +typedef struct SAKeying { 28.63 + u8 state; 28.64 + u8 dying; 28.65 +} SAKeying; 28.66 + 28.67 +typedef struct SAIdent { 28.68 + u32 id; 28.69 + u32 spi; 28.70 + u32 addr; 28.71 + u32 protocol; 28.72 +} SAIdent; 28.73 + 28.74 +struct SAType; 28.75 + 28.76 +/** Security assocation (SA). */ 28.77 +typedef struct SAState { 28.78 + atomic_t refcount; 28.79 + spinlock_t lock; 28.80 + /** Identifier. */ 28.81 + struct SAIdent ident; 28.82 + /** Security flags. */ 28.83 + int security; 28.84 + /** Keying state. */ 28.85 + struct SAKeying keying; 28.86 + /** Byte counts etc. */ 28.87 + struct SACounts counts; 28.88 + /** Byte limits etc. */ 28.89 + struct SALimits limits; 28.90 + /** Replay protection. */ 28.91 + struct SAReplay replay; 28.92 + /** Digest algorithm. */ 28.93 + struct SAKey digest; 28.94 + /** Cipher algorithm. */ 28.95 + struct SAKey cipher; 28.96 + /** Compress algorith. */ 28.97 + struct SAKey compress; 28.98 + /** SA type (ESP, AH). */ 28.99 + struct SAType *type; 28.100 + /** Data for the SA type to use. */ 28.101 + void *data; 28.102 +} SAState; 28.103 + 28.104 +typedef struct SAType { 28.105 + char *name; 28.106 + int protocol; 28.107 + int (*init)(SAState *state, void *args); 28.108 + void (*fini)(SAState *state); 28.109 + int (*recv)(SAState *state, struct sk_buff *skb); 28.110 + int (*send)(SAState *state, struct sk_buff *skb, Tunnel *tunnel); 28.111 + u32 (*size)(SAState *state, int size); 28.112 +} SAType; 28.113 + 28.114 +/** Information needed to create an SA. 28.115 + * Unused algorithms have zero key size. 28.116 + */ 28.117 +typedef struct SAInfo { 28.118 + /** Identifier. */ 28.119 + SAIdent ident; 28.120 + /** Security flags. */ 28.121 + int security; 28.122 + /** Digest algorithm and key. */ 28.123 + SAKey digest; 28.124 + /** Cipher algorithm and key. */ 28.125 + SAKey cipher; 28.126 + /** Compress algorithm and key. */ 28.127 + SAKey compress; 28.128 + /** SA lifetime limits. */ 28.129 + SALimits limits; 28.130 + /** Replay protection window. */ 28.131 + int replay_window; 28.132 +} SAInfo; 28.133 + 28.134 +enum sa_alg_type { 28.135 + SA_ALG_DIGEST = 1, 28.136 + SA_ALG_CIPHER = 2, 28.137 + SA_ALG_COMPRESS = 3, 28.138 +}; 28.139 + 28.140 +extern int SAType_add(SAType *type); 28.141 +extern int SAType_del(SAType *type); 28.142 +extern int SAType_get(int protocol, SAType **type); 28.143 + 28.144 +extern int sa_table_init(void); 28.145 +extern void sa_table_exit(void); 28.146 +extern int sa_table_delete(SAState *state); 28.147 +extern int sa_table_add(SAState *state); 28.148 +extern SAState * sa_table_lookup_spi(u32 spi, u32 protocol, u32 addr); 28.149 +extern SAState * sa_table_lookup_id(u32 id); 28.150 + 28.151 +/** Increment reference count. 28.152 + * 28.153 + * @param sa security association (may be null) 28.154 + */ 28.155 +static inline void SAState_incref(SAState *sa){ 28.156 + if(!sa) return; 28.157 + atomic_inc(&sa->refcount); 28.158 +} 28.159 + 28.160 +/** Decrement reference count, freeing if zero. 28.161 + * 28.162 + * @param sa security association (may be null) 28.163 + */ 28.164 +static inline void SAState_decref(SAState *sa){ 28.165 + if(!sa) return; 28.166 + if(atomic_dec_and_test(&sa->refcount)){ 28.167 + sa->type->fini(sa); 28.168 + kfree(sa); 28.169 + } 28.170 +} 28.171 + 28.172 +extern SAState *SAState_alloc(void); 28.173 +extern int SAState_init(SAIdent *id, SAState **statep); 28.174 +extern int SAState_create(SAInfo *info, SAState **statep); 28.175 + 28.176 +static inline int SAState_send(SAState *sa, struct sk_buff *skb, Tunnel *tunnel){ 28.177 + return sa->type->send(sa, skb, tunnel); 28.178 +} 28.179 + 28.180 +static inline int SAState_recv(SAState *sa, struct sk_buff *skb){ 28.181 + return sa->type->recv(sa, skb); 28.182 +} 28.183 + 28.184 +static inline int SAState_size(SAState *sa, int n){ 28.185 + return sa->type->size(sa, n); 28.186 +} 28.187 + 28.188 +extern int sa_create(int security, u32 spi, u32 protocol, u32 addr, SAState **sa); 28.189 +extern int sa_set(SAInfo *info, int update, SAState **val); 28.190 +extern int sa_delete(int id); 28.191 + 28.192 +enum { 28.193 + SA_AUTH = 1, 28.194 + SA_CONF = 2 28.195 +}; 28.196 + 28.197 +enum { 28.198 + SA_STATE_ACQUIRE = 1, 28.199 + SA_STATE_VALID = 2, 28.200 +}; 28.201 + 28.202 +#endif /* !__VNET_SA_H__ */
29.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 29.2 +++ b/tools/vnet/vnet-module/sa_algorithm.c Mon Nov 22 16:49:15 2004 +0000 29.3 @@ -0,0 +1,367 @@ 29.4 +/* 29.5 + * Copyright (c) 2002 James Morris <jmorris@intercode.com.au> 29.6 + * Copyright (C) 2004 Mike Wray <mike.wray@hp.com> 29.7 + * 29.8 + * This program is free software; you can redistribute it and/or modify 29.9 + * it under the terms of the GNU General Public License as published by the 29.10 + * Free Software Foundation; either version 2 of the License, or (at your 29.11 + * option) any later version. 29.12 + * 29.13 + * This program is distributed in the hope that it will be useful, but 29.14 + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 29.15 + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 29.16 + * for more details. 29.17 + * 29.18 + * You should have received a copy of the GNU General Public License along 29.19 + * with this program; if not, write to the Free software Foundation, Inc., 29.20 + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA 29.21 + * 29.22 + */ 29.23 +#include <linux/config.h> 29.24 +#include <linux/kernel.h> 29.25 +#include <linux/string.h> 29.26 +#include <linux/crypto.h> 29.27 +#include <linux/sched.h> 29.28 +//#include <asm/softirq.h> 29.29 + 29.30 +#include <sa_algorithm.h> 29.31 + 29.32 +#define MODULE_NAME "IPSEC" 29.33 +#define DEBUG 1 29.34 +#undef DEBUG 29.35 +#include "debug.h" 29.36 + 29.37 +/** @file Tables of supported IPSEC algorithms. 29.38 + * Has tables for digests, ciphers and compression algorithms. 29.39 + */ 29.40 + 29.41 +/* 29.42 + * Algorithms supported by IPsec. These entries contain properties which 29.43 + * are used in key negotiation and sa processing, and are used to verify 29.44 + * that instantiated crypto transforms have correct parameters for IPsec 29.45 + * purposes. 29.46 + */ 29.47 + 29.48 +/** Digests. */ 29.49 +static SAAlgorithm digest_alg[] = { 29.50 + { 29.51 + .name = "digest_null", 29.52 + .info = { 29.53 + .digest = { 29.54 + .icv_truncbits = 0, 29.55 + .icv_fullbits = 0, 29.56 + } 29.57 + }, 29.58 + .alg = { 29.59 + .sadb_alg_id = SADB_X_AALG_NULL, 29.60 + .sadb_alg_ivlen = 0, 29.61 + .sadb_alg_minbits = 0, 29.62 + .sadb_alg_maxbits = 0 29.63 + } 29.64 + }, 29.65 + { 29.66 + .name = "md5", 29.67 + .info = { .digest = { 29.68 + .icv_truncbits = 96, 29.69 + .icv_fullbits = 128, 29.70 + } }, 29.71 + .alg = { 29.72 + .sadb_alg_id = SADB_AALG_MD5HMAC, 29.73 + .sadb_alg_ivlen = 0, 29.74 + .sadb_alg_minbits = 128, 29.75 + .sadb_alg_maxbits = 128 29.76 + } 29.77 + }, 29.78 + { 29.79 + .name = "sha1", 29.80 + .info = { 29.81 + .digest = { 29.82 + .icv_truncbits = 96, 29.83 + .icv_fullbits = 160, 29.84 + } 29.85 + }, 29.86 + .alg = { 29.87 + .sadb_alg_id = SADB_AALG_SHA1HMAC, 29.88 + .sadb_alg_ivlen = 0, 29.89 + .sadb_alg_minbits = 160, 29.90 + .sadb_alg_maxbits = 160 29.91 + } 29.92 + }, 29.93 + { 29.94 + .name = "sha256", 29.95 + .info = { 29.96 + .digest = { 29.97 + .icv_truncbits = 128, 29.98 + .icv_fullbits = 256, 29.99 + } 29.100 + }, 29.101 + .alg = { 29.102 + .sadb_alg_id = SADB_X_AALG_SHA2_256HMAC, 29.103 + .sadb_alg_ivlen = 0, 29.104 + .sadb_alg_minbits = 256, 29.105 + .sadb_alg_maxbits = 256 29.106 + } 29.107 + }, 29.108 +/* { */ 29.109 +/* .name = "ripemd160", */ 29.110 +/* .info = { */ 29.111 +/* .digest = { */ 29.112 +/* .icv_truncbits = 96, */ 29.113 +/* .icv_fullbits = 160, */ 29.114 +/* } */ 29.115 +/* }, */ 29.116 +/* .alg = { */ 29.117 +/* .sadb_alg_id = SADB_X_AALG_RIPEMD160HMAC, */ 29.118 +/* .sadb_alg_ivlen = 0, */ 29.119 +/* .sadb_alg_minbits = 160, */ 29.120 +/* .sadb_alg_maxbits = 160 */ 29.121 +/* } */ 29.122 +/* }, */ 29.123 + { /* Terminator */ } 29.124 +}; 29.125 + 29.126 +/** Ciphers. */ 29.127 +static SAAlgorithm cipher_alg[] = { 29.128 + { 29.129 + .name = "cipher_null", 29.130 + .info = { 29.131 + .cipher = { 29.132 + .blockbits = 8, 29.133 + .defkeybits = 0, 29.134 + } 29.135 + }, 29.136 + .alg = { 29.137 + .sadb_alg_id = SADB_EALG_NULL, 29.138 + .sadb_alg_ivlen = 0, 29.139 + .sadb_alg_minbits = 0, 29.140 + .sadb_alg_maxbits = 0 29.141 + } 29.142 + }, 29.143 + { 29.144 + .name = "des", 29.145 + .info = { 29.146 + .cipher = { 29.147 + .blockbits = 64, 29.148 + .defkeybits = 64, 29.149 + } 29.150 + }, 29.151 + .alg = { 29.152 + .sadb_alg_id = SADB_EALG_DESCBC, 29.153 + .sadb_alg_ivlen = 8, 29.154 + .sadb_alg_minbits = 64, 29.155 + .sadb_alg_maxbits = 64 29.156 + } 29.157 + }, 29.158 + { 29.159 + .name = "des3_ede", 29.160 + .info = { 29.161 + .cipher = { 29.162 + .blockbits = 64, 29.163 + .defkeybits = 192, 29.164 + } 29.165 + }, 29.166 + .alg = { 29.167 + .sadb_alg_id = SADB_EALG_3DESCBC, 29.168 + .sadb_alg_ivlen = 8, 29.169 + .sadb_alg_minbits = 192, 29.170 + .sadb_alg_maxbits = 192 29.171 + } 29.172 + }, 29.173 +/* { */ 29.174 +/* .name = "cast128", */ //cast5? 29.175 +/* .info = { */ 29.176 +/* .cipher = { */ 29.177 +/* .blockbits = 64, */ 29.178 +/* .defkeybits = 128, */ 29.179 +/* } */ 29.180 +/* }, */ 29.181 +/* .alg = { */ 29.182 +/* .sadb_alg_id = SADB_X_EALG_CASTCBC, */ 29.183 +/* .sadb_alg_ivlen = 8, */ 29.184 +/* .sadb_alg_minbits = 40, */ 29.185 +/* .sadb_alg_maxbits = 128 */ 29.186 +/* } */ 29.187 +/* }, */ 29.188 + { 29.189 + .name = "blowfish", 29.190 + .info = { 29.191 + .cipher = { 29.192 + .blockbits = 64, 29.193 + .defkeybits = 128, 29.194 + } 29.195 + }, 29.196 + .alg = { 29.197 + .sadb_alg_id = SADB_X_EALG_BLOWFISHCBC, 29.198 + .sadb_alg_ivlen = 8, 29.199 + .sadb_alg_minbits = 40, 29.200 + .sadb_alg_maxbits = 448 29.201 + } 29.202 + }, 29.203 + { 29.204 + .name = "aes", 29.205 + .info = { 29.206 + .cipher = { 29.207 + .blockbits = 128, 29.208 + .defkeybits = 128, 29.209 + } 29.210 + }, 29.211 + .alg = { 29.212 + .sadb_alg_id = SADB_X_EALG_AESCBC, 29.213 + .sadb_alg_ivlen = 8, 29.214 + .sadb_alg_minbits = 128, 29.215 + .sadb_alg_maxbits = 256 29.216 + } 29.217 + }, 29.218 + { /* Terminator */ } 29.219 +}; 29.220 + 29.221 +/** Compressors. */ 29.222 +static SAAlgorithm compress_alg[] = { 29.223 + { 29.224 + .name = "deflate", 29.225 + .info = { 29.226 + .compress = { 29.227 + .threshold = 90, 29.228 + } 29.229 + }, 29.230 + .alg = { .sadb_alg_id = SADB_X_CALG_DEFLATE } 29.231 + }, 29.232 +/* { */ 29.233 +/* .name = "lzs", */ 29.234 +/* .info = { */ 29.235 +/* .compress = { */ 29.236 +/* .threshold = 90, */ 29.237 +/* } */ 29.238 +/* }, */ 29.239 +/* .alg = { .sadb_alg_id = SADB_X_CALG_LZS } */ 29.240 +/* }, */ 29.241 +/* { */ 29.242 +/* .name = "lzjh", */ 29.243 +/* .info = { */ 29.244 +/* .compress = { */ 29.245 +/* .threshold = 50, */ 29.246 +/* } */ 29.247 +/* }, */ 29.248 +/* .alg = { .sadb_alg_id = SADB_X_CALG_LZJH } */ 29.249 +/* }, */ 29.250 + { /* Terminator */ } 29.251 +}; 29.252 + 29.253 +static SAAlgorithm *sa_algorithm_by_id(SAAlgorithm *algo, int alg_id) { 29.254 + for( ; algo && algo->name; algo++){ 29.255 + if (algo->alg.sadb_alg_id == alg_id) { 29.256 + return (algo->available ? algo : NULL); 29.257 + } 29.258 + } 29.259 + return NULL; 29.260 +} 29.261 + 29.262 + 29.263 +static SAAlgorithm *sa_algorithm_by_name(SAAlgorithm *algo, char *name) { 29.264 + if (!name) return NULL; 29.265 + for( ; algo && algo->name; algo++){ 29.266 + if (strcmp(name, algo->name) == 0) { 29.267 + return (algo->available ? algo : NULL); 29.268 + } 29.269 + } 29.270 + return NULL; 29.271 +} 29.272 + 29.273 +SAAlgorithm *sa_digest_by_id(int alg_id) { 29.274 + return sa_algorithm_by_id(digest_alg, alg_id); 29.275 +} 29.276 + 29.277 +SAAlgorithm *sa_cipher_by_id(int alg_id) { 29.278 + return sa_algorithm_by_id(cipher_alg, alg_id); 29.279 +} 29.280 + 29.281 +SAAlgorithm *sa_compress_by_id(int alg_id) { 29.282 + return sa_algorithm_by_id(compress_alg, alg_id); 29.283 +} 29.284 + 29.285 +SAAlgorithm *sa_digest_by_name(char *name) { 29.286 + return sa_algorithm_by_name(digest_alg, name); 29.287 +} 29.288 + 29.289 +SAAlgorithm *sa_cipher_by_name(char *name) { 29.290 + return sa_algorithm_by_name(cipher_alg, name); 29.291 +} 29.292 + 29.293 +SAAlgorithm *sa_compress_by_name(char *name) { 29.294 + return sa_algorithm_by_name(compress_alg, name); 29.295 +} 29.296 + 29.297 +SAAlgorithm *sa_digest_by_index(unsigned int idx) { 29.298 + return digest_alg + idx; 29.299 +} 29.300 + 29.301 +SAAlgorithm *sa_cipher_by_index(unsigned int idx) { 29.302 + return cipher_alg + idx; 29.303 +} 29.304 + 29.305 +SAAlgorithm *sa_compress_by_index(unsigned int idx) { 29.306 + return compress_alg + idx; 29.307 +} 29.308 + 29.309 +static void sa_algorithm_probe(SAAlgorithm *algo){ 29.310 + int status; 29.311 + dprintf("> algo=%p\n", algo); 29.312 + for( ; algo && algo->name; algo++){ 29.313 + dprintf("> algorithm %s...\n", algo->name); 29.314 + status = crypto_alg_available(algo->name, 0); 29.315 + dprintf("> algorithm %s status=%d\n",algo->name, status); 29.316 + if (algo->available != status){ 29.317 + algo->available = status; 29.318 + } 29.319 + } 29.320 + dprintf("<\n"); 29.321 +} 29.322 + 29.323 +/** Crypto api is broken. When an unregistered algorithm is requested it 29.324 + * tries to load a module of the same name. But not all algorithms are 29.325 + * defined by modules of the same name. 29.326 + */ 29.327 +static char *crypto_modules[] = { 29.328 + "aes", 29.329 + //"arc4", 29.330 + "blowfish", 29.331 + //"cast5", 29.332 + //"cast6", 29.333 + "crypto_null", 29.334 + "des", 29.335 + //"md4", 29.336 + "md5", 29.337 + //"serpent", 29.338 + "sha1", 29.339 + "sha256", 29.340 + //"sha512", 29.341 + //"twofish", 29.342 + NULL 29.343 +}; 29.344 + 29.345 +#include <linux/kmod.h> 29.346 + 29.347 +static void sa_module_probe(char **modules){ 29.348 + char **p; 29.349 + dprintf(">\n"); 29.350 + for(p = modules; *p; p++){ 29.351 + dprintf("> %s\n", *p); 29.352 + request_module(*p); 29.353 + } 29.354 + dprintf("<\n"); 29.355 +} 29.356 + 29.357 +/** 29.358 + * Probe for the availability of crypto algorithms, and set the available 29.359 + * flag for any algorithms found on the system. This is typically called by 29.360 + * pfkey during userspace SA add, update or register. 29.361 + */ 29.362 +void sa_algorithm_probe_all(void){ 29.363 + dprintf("> \n"); 29.364 + //BUG_ON(in_softirq()); 29.365 + sa_module_probe(crypto_modules); 29.366 + sa_algorithm_probe(digest_alg); 29.367 + sa_algorithm_probe(cipher_alg); 29.368 + sa_algorithm_probe(compress_alg); 29.369 + dprintf("<\n"); 29.370 +}
30.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 30.2 +++ b/tools/vnet/vnet-module/sa_algorithm.h Mon Nov 22 16:49:15 2004 +0000 30.3 @@ -0,0 +1,63 @@ 30.4 +/* 30.5 + * Copyright (C) 2004 Mike Wray <mike.wray@hp.com> 30.6 + * 30.7 + * This program is free software; you can redistribute it and/or modify 30.8 + * it under the terms of the GNU General Public License as published by the 30.9 + * Free Software Foundation; either version 2 of the License, or (at your 30.10 + * option) any later version. 30.11 + * 30.12 + * This program is distributed in the hope that it will be useful, but 30.13 + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 30.14 + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 30.15 + * for more details. 30.16 + * 30.17 + * You should have received a copy of the GNU General Public License along 30.18 + * with this program; if not, write to the Free software Foundation, Inc., 30.19 + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA 30.20 + * 30.21 + */ 30.22 +#ifndef __VNET_SA_ALGORITHM_H__ 30.23 +#define __VNET_SA_ALGORITHM_H__ 30.24 + 30.25 +#include <linux/types.h> 30.26 +#include <linux/pfkeyv2.h> 30.27 + 30.28 +typedef struct SADigestInfo { 30.29 + u16 icv_truncbits; 30.30 + u16 icv_fullbits; 30.31 +} SADigestInfo; 30.32 + 30.33 +typedef struct SACipherInfo { 30.34 + u16 blockbits; 30.35 + u16 defkeybits; 30.36 +} SACipherInfo; 30.37 + 30.38 +typedef struct SACompressInfo { 30.39 + u16 threshold; 30.40 +} SACompressInfo; 30.41 + 30.42 +typedef struct SAAlgorithm { 30.43 + char *name; 30.44 + u8 available; 30.45 + union { 30.46 + SADigestInfo digest; 30.47 + SACipherInfo cipher; 30.48 + SACompressInfo compress; 30.49 + } info; 30.50 + struct sadb_alg alg; 30.51 +} SAAlgorithm; 30.52 + 30.53 +extern SAAlgorithm *sa_digest_by_id(int alg_id); 30.54 +extern SAAlgorithm *sa_cipher_by_id(int alg_id); 30.55 +extern SAAlgorithm *sa_compress_by_id(int alg_id); 30.56 +extern SAAlgorithm *sa_digest_by_name(char *name); 30.57 +extern SAAlgorithm *sa_cipher_by_name(char *name); 30.58 +extern SAAlgorithm *sa_compress_by_name(char *name); 30.59 +extern SAAlgorithm *sa_digest_by_index(unsigned int idx); 30.60 +extern SAAlgorithm *sa_cipher_by_index(unsigned int idx); 30.61 +extern SAAlgorithm *sa_compress_by_index(unsigned int idx); 30.62 +extern void sa_algorithm_probe_all(void); 30.63 + 30.64 +#define MAX_KEY_BITS 512 30.65 + 30.66 +#endif /* ! __VNET_SA_ALGORITHM_H__ */
31.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 31.2 +++ b/tools/vnet/vnet-module/skb_context.c Mon Nov 22 16:49:15 2004 +0000 31.3 @@ -0,0 +1,92 @@ 31.4 +/* 31.5 + * Copyright (C) 2004 Mike Wray <mike.wray@hp.com> 31.6 + * 31.7 + * This program is free software; you can redistribute it and/or modify 31.8 + * it under the terms of the GNU General Public License as published by the 31.9 + * Free Software Foundation; either version 2 of the License, or (at your 31.10 + * option) any later version. 31.11 + * 31.12 + * This program is distributed in the hope that it will be useful, but 31.13 + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 31.14 + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 31.15 + * for more details. 31.16 + * 31.17 + * You should have received a copy of the GNU General Public License along 31.18 + * with this program; if not, write to the Free software Foundation, Inc., 31.19 + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA 31.20 + * 31.21 + */ 31.22 +#include <linux/config.h> 31.23 +#include <linux/kernel.h> 31.24 +#include <linux/skbuff.h> 31.25 +#include <linux/slab.h> 31.26 + 31.27 +#include <skb_context.h> 31.28 + 31.29 +#define MODULE_NAME "VNET" 31.30 +#define DEBUG 1 31.31 +#undef DEBUG 31.32 +#include "debug.h" 31.33 + 31.34 +SkbContext *SkbContext_create(u32 vnet, u32 addr, int protocol, void *data, 31.35 + void (*free_fn)(SkbContext *)){ 31.36 + SkbContext *context = NULL; 31.37 + 31.38 + context = kmalloc(sizeof(SkbContext), GFP_ATOMIC); 31.39 + if(!context) goto exit; 31.40 + context->vnet = vnet; 31.41 + context->addr = addr; 31.42 + context->protocol = protocol; 31.43 + context->data = data; 31.44 + context->free_fn = free_fn; 31.45 + context->next = NULL; 31.46 + atomic_set(&context ->refcount, 1); 31.47 + exit: 31.48 + return context; 31.49 +} 31.50 + 31.51 +void SkbContext_free(SkbContext *context){ 31.52 + if(!context) return; 31.53 + if(context->next) SkbContext_decref(context->next); 31.54 + if(context->free_fn) context->free_fn(context); 31.55 + context->vnet = 0; 31.56 + context->addr = 0; 31.57 + context->protocol = 0; 31.58 + context->free_fn = NULL; 31.59 + context->data = NULL; 31.60 + context->next = NULL; 31.61 + kfree(context); 31.62 +} 31.63 + 31.64 +int SkbContext_push(SkbContext **val, u32 vnet, u32 addr, int protocol, 31.65 + void *data, void (*free_fn)(SkbContext *)){ 31.66 + int err = 0; 31.67 + SkbContext *context = NULL; 31.68 + 31.69 + dprintf("> vnet=%u addr=%u.%u.%u.%u protocol=%d\n", 31.70 + vnet, NIPQUAD(addr), protocol); 31.71 + context = SkbContext_create(vnet, addr, protocol, data, free_fn); 31.72 + if(!context){ 31.73 + err = -ENOMEM; 31.74 + goto exit; 31.75 + } 31.76 + context->next = *val; 31.77 + *val = context; 31.78 + exit: 31.79 + dprintf("< err=%d\n", err); 31.80 + return err; 31.81 +} 31.82 + 31.83 +int skb_push_context(struct sk_buff *skb, u32 vnet, u32 addr, int protocol, 31.84 + void *data, void (*free_fn)(SkbContext *)){ 31.85 + int err = 0; 31.86 + //SkbContext *ctxt = SKB_CONTEXT(skb); 31.87 + dprintf("> skb=%p\n", skb); 31.88 + 31.89 + //err = SkbContext_push(&ctxt, vnet, addr, protocol, data, free_fn); //todo fixme 31.90 + //SKB_CONTEXT(skb) = ctxt;//todo fixme 31.91 + dprintf("< err=%d\n", err); 31.92 + return err; 31.93 +} 31.94 + 31.95 +
32.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 32.2 +++ b/tools/vnet/vnet-module/skb_context.h Mon Nov 22 16:49:15 2004 +0000 32.3 @@ -0,0 +1,76 @@ 32.4 +/* 32.5 + * Copyright (C) 2004 Mike Wray <mike.wray@hp.com> 32.6 + * 32.7 + * This program is free software; you can redistribute it and/or modify 32.8 + * it under the terms of the GNU General Public License as published by the 32.9 + * Free Software Foundation; either version 2 of the License, or (at your 32.10 + * option) any later version. 32.11 + * 32.12 + * This program is distributed in the hope that it will be useful, but 32.13 + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 32.14 + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 32.15 + * for more details. 32.16 + * 32.17 + * You should have received a copy of the GNU General Public License along 32.18 + * with this program; if not, write to the Free software Foundation, Inc., 32.19 + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA 32.20 + * 32.21 + */ 32.22 + 32.23 +#ifndef __VNET_SKB_CONTEXT_H__ 32.24 +#define __VNET_SKB_CONTEXT_H__ 32.25 + 32.26 +#include <linux/config.h> 32.27 +#include <linux/kernel.h> 32.28 +#include <asm/atomic.h> 32.29 +#include <linux/types.h> 32.30 + 32.31 +/** Structure used to record inbound processing path for skbs. 32.32 + * For example, the ETHERIP protocol handler can use this to 32.33 + * tell whether an inbound packet came through IPSEC ESP or not. 32.34 + */ 32.35 +typedef struct SkbContext { 32.36 + u32 vnet; 32.37 + u32 addr; 32.38 + int protocol; 32.39 + void *data; 32.40 + void (*free_fn)(struct SkbContext *); 32.41 + atomic_t refcount; 32.42 + struct SkbContext *next; 32.43 +} SkbContext; 32.44 + 32.45 +/** Decrement the reference count, freeing if zero. 32.46 + * 32.47 + * @param context context (may be null) 32.48 + */ 32.49 +static inline void SkbContext_decref(SkbContext *context){ 32.50 + extern void SkbContext_free(SkbContext *context); 32.51 + if(!context) return; 32.52 + if(atomic_dec_and_test(&context->refcount)){ 32.53 + SkbContext_free(context); 32.54 + } 32.55 +} 32.56 + 32.57 +/** Increment the reference count. 32.58 + * 32.59 + * @param context context (may be null) 32.60 + */ 32.61 +static inline void SkbContext_incref(SkbContext *context){ 32.62 + if(!context) return; 32.63 + atomic_inc(&context->refcount); 32.64 +} 32.65 + 32.66 +extern SkbContext *SkbContext_create(u32 vnet, u32 addr, int protocol, void *data, 32.67 + void (*free_fn)(SkbContext *)); 32.68 + 32.69 +extern int SkbContext_push(SkbContext **val, u32 vnet, u32 addr, int protocol, 32.70 + void *data, void (*free_fn)(SkbContext *)); 32.71 + 32.72 +struct sk_buff; 32.73 +extern int skb_push_context(struct sk_buff *skb, u32 vnet, u32 addr, int protocol, 32.74 + void *data, void (*free_fn)(SkbContext *)); 32.75 + 32.76 +//todo: fixme 32.77 +#define SKB_CONTEXT(_skb) ((SkbContext *)(&(_skb)->cb[0])) 32.78 + 32.79 +#endif /* !__VNET_SKB_CONTEXT_H__ */
33.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 33.2 +++ b/tools/vnet/vnet-module/skb_util.c Mon Nov 22 16:49:15 2004 +0000 33.3 @@ -0,0 +1,515 @@ 33.4 +/* 33.5 + * Copyright (C) 2004 Mike Wray <mike.wray@hp.com> 33.6 + * 33.7 + * This program is free software; you can redistribute it and/or modify 33.8 + * it under the terms of the GNU General Public License as published by the 33.9 + * Free Software Foundation; either version 2 of the License, or (at your 33.10 + * option) any later version. 33.11 + * 33.12 + * This program is distributed in the hope that it will be useful, but 33.13 + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 33.14 + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 33.15 + * for more details. 33.16 + * 33.17 + * You should have received a copy of the GNU General Public License along 33.18 + * with this program; if not, write to the Free software Foundation, Inc., 33.19 + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA 33.20 + * 33.21 + */ 33.22 +#include <linux/config.h> 33.23 +#include <linux/module.h> 33.24 +#include <linux/kernel.h> 33.25 +#include <linux/init.h> 33.26 +#include <linux/version.h> 33.27 + 33.28 +#include <asm/scatterlist.h> 33.29 +#include <linux/crypto.h> 33.30 +#include <linux/pfkeyv2.h> 33.31 +#include <linux/random.h> 33.32 + 33.33 +#include <linux/net.h> 33.34 +#include <linux/in.h> 33.35 +#include <linux/inet.h> 33.36 +#include <linux/netdevice.h> 33.37 +#include <linux/tcp.h> 33.38 +#include <linux/udp.h> 33.39 + 33.40 +#include <net/ip.h> 33.41 +#include <net/protocol.h> 33.42 +#include <net/route.h> 33.43 +#include <linux/skbuff.h> 33.44 + 33.45 +#include <varp.h> 33.46 +#include <skb_util.h> 33.47 + 33.48 +#define MODULE_NAME "VNET" 33.49 +#define DEBUG 1 33.50 +#undef DEBUG 33.51 +#include "debug.h" 33.52 + 33.53 +static const int DEBUG_SCATTERLIST = 0; 33.54 +static const int DEBUG_SKB = 0; 33.55 + 33.56 +//============================================================================ 33.57 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) 33.58 +#define SET_SCATTER_ADDR(sg, addr) do{} while(0) 33.59 +#else 33.60 +#define SET_SCATTER_ADDR(sg, addr) (sg).address = (addr) 33.61 +#endif 33.62 + 33.63 +/** Make enough room in an skb for extra header and trailer. 33.64 + * 33.65 + * @param pskb return parameter for expanded skb 33.66 + * @param skb skb 33.67 + * @param head_n required headroom 33.68 + * @param tail_n required tailroom 33.69 + * @return 0 on success, error code otherwise 33.70 + */ 33.71 +int skb_make_room(struct sk_buff **pskb, struct sk_buff *skb, int head_n, int tail_n){ 33.72 + int err = 0; 33.73 + int has_headroom = (head_n <= skb_headroom(skb)); 33.74 + int has_tailroom = (tail_n <= skb_tailroom(skb)); 33.75 + int writeable = !skb_cloned(skb) && !skb_shared(skb); 33.76 + 33.77 + dprintf("> skb=%p headroom=%d head_n=%d tailroom=%d tail_n=%d\n", 33.78 + skb, 33.79 + skb_headroom(skb), head_n, 33.80 + skb_tailroom(skb), tail_n); 33.81 + if(writeable && has_headroom && has_tailroom){ 33.82 + // There's room! Reuse it. 33.83 + *pskb = skb; 33.84 + } else if(writeable && has_tailroom){ 33.85 + // Tailroom, no headroom. Expand header the way GRE does. 33.86 + struct sk_buff *new_skb = skb_realloc_headroom(skb, head_n + 16); 33.87 + if(!new_skb){ 33.88 + err = -ENOMEM; 33.89 + goto exit; 33.90 + } 33.91 + dev_kfree_skb(skb); 33.92 + *pskb = new_skb; 33.93 + } else { 33.94 + // No room. Expand. There may be more efficient ways to do 33.95 + // this, but this is simple and correct. 33.96 + struct sk_buff *new_skb = skb_copy_expand(skb, head_n + 16, tail_n, GFP_ATOMIC); 33.97 + if(!new_skb){ 33.98 + err = -ENOMEM; 33.99 + goto exit; 33.100 + } 33.101 + dev_kfree_skb(skb); 33.102 + *pskb = new_skb; 33.103 + } 33.104 + dprintf("> skb=%p headroom=%d head_n=%d tailroom=%d tail_n=%d\n", 33.105 + *pskb, 33.106 + skb_headroom(*pskb), head_n, 33.107 + skb_tailroom(*pskb), tail_n); 33.108 + exit: 33.109 + dprintf("< err=%d\n", err); 33.110 + return err; 33.111 +} 33.112 + 33.113 +/** Copy some data bits from a kernel buffer to an skb. 33.114 + * Derived in the obvious way from skb_copy_bits(). 33.115 + */ 33.116 +int skb_put_bits(const struct sk_buff *skb, int offset, void *src, int len) 33.117 +{ 33.118 + int i, copy; 33.119 + int start = skb->len - skb->data_len; 33.120 + 33.121 + if (offset > (int)skb->len-len) 33.122 + goto fault; 33.123 + 33.124 + /* Copy header. */ 33.125 + if ((copy = start-offset) > 0) { 33.126 + if (copy > len) 33.127 + copy = len; 33.128 + memcpy(skb->data + offset, src, copy); 33.129 + if ((len -= copy) == 0) 33.130 + return 0; 33.131 + offset += copy; 33.132 + src += copy; 33.133 + } 33.134 + 33.135 + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 33.136 + int end; 33.137 + 33.138 + BUG_TRAP(start <= offset+len); 33.139 + 33.140 + end = start + skb_shinfo(skb)->frags[i].size; 33.141 + if ((copy = end-offset) > 0) { 33.142 + u8 *vaddr; 33.143 + 33.144 + if (copy > len) 33.145 + copy = len; 33.146 + 33.147 + vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]); 33.148 + memcpy(vaddr + skb_shinfo(skb)->frags[i].page_offset + offset - start, 33.149 + src, 33.150 + copy); 33.151 + kunmap_skb_frag(vaddr); 33.152 + 33.153 + if ((len -= copy) == 0) 33.154 + return 0; 33.155 + offset += copy; 33.156 + src += copy; 33.157 + } 33.158 + start = end; 33.159 + } 33.160 + 33.161 + if (skb_shinfo(skb)->frag_list) { 33.162 + struct sk_buff *list; 33.163 + 33.164 + for (list = skb_shinfo(skb)->frag_list; list; list=list->next) { 33.165 + int end; 33.166 + 33.167 + BUG_TRAP(start <= offset+len); 33.168 + 33.169 + end = start + list->len; 33.170 + if ((copy = end-offset) > 0) { 33.171 + if (copy > len) 33.172 + copy = len; 33.173 + if (skb_put_bits(list, offset-start, src, copy)) 33.174 + goto fault; 33.175 + if ((len -= copy) == 0) 33.176 + return 0; 33.177 + offset += copy; 33.178 + src += copy; 33.179 + } 33.180 + start = end; 33.181 + } 33.182 + } 33.183 + if (len == 0) 33.184 + return 0; 33.185 + 33.186 + fault: 33.187 + return -EFAULT; 33.188 +} 33.189 + 33.190 +/** Add some space to the end of a (possibly fragmented) skb. 33.191 + * 33.192 + * Only works with Xen output skbs. Output skbs have 1 frag, and we 33.193 + * add another frag for the extra space. 33.194 + * 33.195 + * @param skb skb 33.196 + * @param n number of bytes to add 33.197 + * @return 0 on success, error code otherwise 33.198 + * 33.199 + * @todo fixme 33.200 + */ 33.201 +int pskb_put(struct sk_buff *skb, int n){ 33.202 + int err = 0; 33.203 + if(1 || skb_is_nonlinear(skb)){ 33.204 + struct skb_shared_info *info = skb_shinfo(skb); 33.205 + char *ptr = NULL; 33.206 + 33.207 + if(info->nr_frags >= MAX_SKB_FRAGS){ 33.208 + err = -ENOMEM; 33.209 + goto exit; 33.210 + } 33.211 + ptr = kmalloc(n, GFP_ATOMIC); 33.212 + if(!ptr){ 33.213 + err = -ENOMEM; 33.214 + goto exit; 33.215 + } 33.216 + info->nr_frags++; 33.217 + info->frags[info->nr_frags - 1].page = virt_to_page(ptr); 33.218 + info->frags[info->nr_frags - 1].page_offset = ((unsigned long)ptr & ~PAGE_MASK); 33.219 + info->frags[info->nr_frags - 1].size = n; 33.220 + 33.221 + skb->data_len += n; 33.222 + skb->len += n; 33.223 + } else { 33.224 + __skb_put(skb, n); 33.225 + } 33.226 + exit: 33.227 + if(err) dprintf("< err=%d\n", err); 33.228 + return err; 33.229 +} 33.230 + 33.231 +/** Print some bits of an skb. 33.232 + * 33.233 + * @param skb to print 33.234 + * @param offset byte offset to start printing at 33.235 + * @param n number of bytes to print 33.236 + */ 33.237 +void skb_print_bits(struct sk_buff *skb, int offset, int n){ 33.238 + int chunk = 16; 33.239 + int i, k; 33.240 + u8 buff[chunk]; 33.241 + if(!DEBUG_SKB) return; 33.242 + while(n){ 33.243 + k = (n > chunk ? chunk : n); 33.244 + skb_copy_bits(skb, offset, buff, k); 33.245 + printk("%03d ", offset); 33.246 + for(i=0; i<k; i++){ 33.247 + if(i == 8)printk(" "); 33.248 + printk(":%02x", buff[i] & 0xff); 33.249 + } 33.250 + printk(" \n"); 33.251 + n -= k; 33.252 + offset += k; 33.253 + } 33.254 +} 33.255 + 33.256 +/** Print a buffer. 33.257 + * 33.258 + * @param buf to print 33.259 + * @param n number of bytes to print 33.260 + */ 33.261 +void buf_print(char *buf, int n){ 33.262 + int i; 33.263 + for(i=0; i<n; i++){ 33.264 + if( i % 16 == 0) printk("\n%04d ", i); 33.265 + else if(i % 8 == 0) printk(" "); 33.266 + printk(":%02x", buf[i] & 0xff); 33.267 + } 33.268 + printk(" %04d\n", n); 33.269 +} 33.270 + 33.271 +/** Remove some space from the tail of an skb. 33.272 + * 33.273 + * @todo fixme: Do we need to handle frags? 33.274 + */ 33.275 +void *skb_trim_tail(struct sk_buff *skb, int n){ 33.276 + skb->tail -= n; 33.277 + skb->len -= n; 33.278 + return skb->tail; 33.279 +} 33.280 + 33.281 +// #define BUG_TRAP(x) 33.282 +// if(!(x)){ printk("KERNEL: assertion (" #x ") failed at " __FILE__ "(%d)\n", __LINE__); } 33.283 + 33.284 +/** Convert a (possibly fragmented) skb into a scatter list. 33.285 + * 33.286 + * @param skb skb to convert 33.287 + * @param sg scatterlist to set up 33.288 + * @param sg_n size of sg on input, number of elements set on output 33.289 + * @param offset offset into data to start at 33.290 + * @param len number of bytes 33.291 + * @return 0 on success, error code otherwise 33.292 + */ 33.293 +int skb_scatterlist(struct sk_buff *skb, struct scatterlist *sg, int *sg_n, 33.294 + int offset, int len){ 33.295 + int err = 0; 33.296 + int start; // No. of bytes copied so far (where next copy starts). 33.297 + int size; // Size of the next chunk. 33.298 + int end; // Where the next chunk ends (start + size). 33.299 + int copy; // Number of bytes to copy in one operation. 33.300 + int sg_i = 0; // Index into sg. 33.301 + int i; 33.302 + 33.303 + if(DEBUG_SCATTERLIST){ 33.304 + dprintf("> offset=%d len=%d (end=%d), skb len=%d,\n", 33.305 + offset, len, offset+len, skb->len); 33.306 + } 33.307 + start = 0; 33.308 + size = skb_headlen(skb); 33.309 + end = start + size; 33.310 + copy = end - offset; 33.311 + if(copy > 0){ 33.312 + char *p; 33.313 + if(copy > len) copy = len; 33.314 + if(sg_i >= *sg_n){ 33.315 + err = -EINVAL; 33.316 + goto exit; 33.317 + } 33.318 + p = skb->data + offset; 33.319 + SET_SCATTER_ADDR(sg[sg_i], NULL); 33.320 + sg[sg_i].page = virt_to_page(p); 33.321 + sg[sg_i].offset = ((unsigned long)p & ~PAGE_MASK); 33.322 + sg[sg_i].length = copy; 33.323 + if(DEBUG_SCATTERLIST){ 33.324 + dprintf("> sg_i=%d .page=%p .offset=%u .length=%d\n", 33.325 + sg_i, sg[sg_i].page, sg[sg_i].offset, sg[sg_i].length); 33.326 + } 33.327 + sg_i++; 33.328 + if((len -= copy) == 0) goto exit; 33.329 + offset += copy; 33.330 + } 33.331 + start = end; 33.332 + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++){ 33.333 + BUG_TRAP(start <= offset + len); 33.334 + size = skb_shinfo(skb)->frags[i].size; 33.335 + end = start + size; 33.336 + copy = end - offset; 33.337 + if(copy > 0){ 33.338 + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 33.339 + if(copy > len) copy = len; 33.340 + if(sg_i >= *sg_n){ 33.341 + err = -EINVAL; 33.342 + goto exit; 33.343 + } 33.344 + SET_SCATTER_ADDR(sg[sg_i], NULL); 33.345 + sg[sg_i].page = frag->page; 33.346 + sg[sg_i].offset = frag->page_offset + offset - start; 33.347 + sg[sg_i].length = copy; 33.348 + if(DEBUG_SCATTERLIST){ 33.349 + dprintf("> sg_i=%d .page=%p .offset=%u .length=%d\n", 33.350 + sg_i, sg[sg_i].page, sg[sg_i].offset, sg[sg_i].length); 33.351 + } 33.352 + sg_i++; 33.353 + if((len -= copy) == 0) goto exit; 33.354 + offset += copy; 33.355 + } 33.356 + start = end; 33.357 + } 33.358 + exit: 33.359 + if(!err) *sg_n = sg_i; 33.360 + if(len) wprintf("> len=%d\n", len); 33.361 + if(len) BUG(); 33.362 + if(err) dprintf("< err=%d sg_n=%d\n", err, *sg_n); 33.363 + return err; 33.364 +} 33.365 + 33.366 +struct arpheader 33.367 +{ 33.368 + unsigned short ar_hrd; /* format of hardware address */ 33.369 + unsigned short ar_pro; /* format of protocol address */ 33.370 + unsigned char ar_hln; /* length of hardware address */ 33.371 + unsigned char ar_pln; /* length of protocol address */ 33.372 + unsigned short ar_op; /* ARP opcode (command) */ 33.373 + 33.374 +#if 1 33.375 + /* 33.376 + * Ethernet looks like this : This bit is variable sized however... 33.377 + */ 33.378 + unsigned char ar_sha[ETH_ALEN]; /* sender hardware address */ 33.379 + unsigned char ar_sip[4]; /* sender IP address */ 33.380 + unsigned char ar_tha[ETH_ALEN]; /* target hardware address */ 33.381 + unsigned char ar_tip[4]; /* target IP address */ 33.382 +#endif 33.383 + 33.384 +}; 33.385 + 33.386 +void print_skb_data(char *msg, int count, struct sk_buff *skb, u8 *data, int len) 33.387 +{ 33.388 + static int skb_count = 1000000; 33.389 + u8 *ptr, *end; 33.390 + u32 src_addr, dst_addr; 33.391 + // Transport layer header. 33.392 + union { 33.393 + struct tcphdr *th; 33.394 + struct udphdr *uh; 33.395 + struct icmphdr *icmph; 33.396 + struct igmphdr *igmph; 33.397 + struct iphdr *ipiph; 33.398 + unsigned char *raw; 33.399 + } h; 33.400 + // Network layer header. 33.401 + union { 33.402 + struct iphdr *iph; 33.403 + struct ipv6hdr *ipv6h; 33.404 + struct arpheader *arph; 33.405 + struct ipxhdr *ipxh; 33.406 + unsigned char *raw; 33.407 + } nh; 33.408 + // Link layer header. 33.409 + union { 33.410 + struct ethhdr *ethernet; 33.411 + unsigned char *raw; 33.412 + } mac; 33.413 + int protocol; 33.414 + if(!count) count = ++skb_count; 33.415 + if(!msg) msg = (char *)__FUNCTION__; 33.416 + if(!data){ 33.417 + printk("%s.%d> null data\n", msg, count); 33.418 + return; 33.419 + } 33.420 + ptr = data; 33.421 + end = data + len; 33.422 + mac.raw = ptr; 33.423 + ptr += sizeof(struct ethhdr); 33.424 + if(ptr > end){ printk("***MAC:"); goto exit; } 33.425 + protocol = ntohs(mac.ethernet->h_proto); 33.426 + nh.raw = ptr; 33.427 + 33.428 + printk("%s.%d> type=%d protocol=0x%x\n", 33.429 + msg, count, skb->pkt_type, htons(skb->protocol)); 33.430 + if(1){ 33.431 + printk("%s.%d> %p mac src=" MACFMT " dst=" MACFMT "\n", 33.432 + msg, count, data, 33.433 + MAC6TUPLE(mac.ethernet->h_source), 33.434 + MAC6TUPLE(mac.ethernet->h_dest)); 33.435 + } 33.436 + 33.437 + switch(protocol){ 33.438 + case ETH_P_ARP: 33.439 + ptr += sizeof(struct arpheader); 33.440 + if(ptr > end){ printk("***ARP:"); goto exit; } 33.441 + if(0){ 33.442 + printk("%s.%d> ARP hrd=%d, pro=%d, hln=%d, pln=%d, op=%d\n", 33.443 + msg, count, 33.444 + nh.arph->ar_hrd, nh.arph->ar_pro, nh.arph->ar_hln, 33.445 + nh.arph->ar_pln, nh.arph->ar_op); 33.446 + } 33.447 + memcpy(&src_addr, nh.arph->ar_sip, 4); 33.448 + src_addr = ntohl(src_addr); 33.449 + memcpy(&dst_addr, nh.arph->ar_tip, 4); 33.450 + dst_addr = ntohl(dst_addr); 33.451 + printk("%s.%d> ARP HW src=" MACFMT " dst=" MACFMT "\n", 33.452 + msg, count, MAC6TUPLE(nh.arph->ar_sha), MAC6TUPLE(nh.arph->ar_tha)); 33.453 + printk("%s.%d> ARP IP src=" IPFMT " dst=" IPFMT "\n", 33.454 + msg, count, HIPQUAD(src_addr), HIPQUAD(dst_addr)); 33.455 + break; 33.456 + case ETH_P_IP: { 33.457 + u16 src_port, dst_port; 33.458 + if(ptr + sizeof(struct iphdr) > end){ printk("***IP:"); goto exit; } 33.459 + src_addr = ntohl(nh.iph->saddr); 33.460 + dst_addr = ntohl(nh.iph->daddr); 33.461 + if(1){ 33.462 + printk("%s.%d> IP proto=%d src=" IPFMT " dst=" IPFMT "\n", 33.463 + msg, count, nh.iph->protocol, 33.464 + HIPQUAD(src_addr), HIPQUAD(dst_addr)); 33.465 + printk("%s.%d> IP tot_len=%u len=%d\n", 33.466 + msg, count, nh.iph->tot_len & 0xffff, len - ETH_HLEN); 33.467 + } 33.468 + ptr += (nh.iph->ihl * 4); 33.469 + if(ptr > end){ printk ("***IP: len"); goto exit; } 33.470 + h.raw = ptr; 33.471 + switch(nh.iph->protocol){ 33.472 + case IPPROTO_TCP: 33.473 + ptr += sizeof(struct tcphdr); 33.474 + if(ptr > end){ printk("***TCP:"); goto exit; } 33.475 + src_port = ntohs(h.th->source); 33.476 + dst_port = ntohs(h.th->dest); 33.477 + printk("%s.%d> TCP src=" IPFMT ":%u dst=" IPFMT ":%u\n", 33.478 + msg, count, 33.479 + HIPQUAD(src_addr), src_port, 33.480 + HIPQUAD(dst_addr), dst_port); 33.481 + break; 33.482 + case IPPROTO_UDP: 33.483 + ptr += sizeof(struct udphdr); 33.484 + if(ptr > end){ printk("***UDP:"); goto exit; } 33.485 + src_port = ntohs(h.uh->source); 33.486 + dst_port = ntohs(h.uh->dest); 33.487 + printk("%s.%d> UDP src=" IPFMT ":%u dst=" IPFMT ":%u\n", 33.488 + msg, count, 33.489 + HIPQUAD(src_addr), src_port, 33.490 + HIPQUAD(dst_addr), dst_port); 33.491 + break; 33.492 + default: 33.493 + printk("%s.%d> IP %d src=" IPFMT " dst=" IPFMT "\n", 33.494 + msg, count, 33.495 + nh.iph->protocol, HIPQUAD(src_addr), HIPQUAD(dst_addr)); 33.496 + break; 33.497 + } 33.498 + break; } 33.499 + case ETH_P_IPV6: 33.500 + printk("%s.%d> IPv6\n", msg, count); 33.501 + break; 33.502 + case ETH_P_IPX: 33.503 + printk("%s.%d> IPX\n", msg, count); 33.504 + break; 33.505 + default: 33.506 + printk("%s.%d> protocol=%d\n", msg, count, protocol); 33.507 + break; 33.508 + } 33.509 + return; 33.510 + exit: 33.511 + printk("%s.%d> %s: skb problem\n", msg, count, __FUNCTION__); 33.512 + printk("%s.%d> %s: data=%p end=%p(%d) ptr=%p(%d) eth=%d arp=%d ip=%d\n", 33.513 + msg, count, __FUNCTION__, 33.514 + data, end, end - data, ptr, ptr - data, 33.515 + sizeof(struct ethhdr), sizeof(struct arphdr), sizeof(struct iphdr)); 33.516 + return; 33.517 +} 33.518 +
34.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 34.2 +++ b/tools/vnet/vnet-module/skb_util.h Mon Nov 22 16:49:15 2004 +0000 34.3 @@ -0,0 +1,43 @@ 34.4 +/* 34.5 + * Copyright (C) 2004 Mike Wray <mike.wray@hp.com> 34.6 + * 34.7 + * This program is free software; you can redistribute it and/or modify 34.8 + * it under the terms of the GNU General Public License as published by the 34.9 + * Free Software Foundation; either version 2 of the License, or (at your 34.10 + * option) any later version. 34.11 + * 34.12 + * This program is distributed in the hope that it will be useful, but 34.13 + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 34.14 + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 34.15 + * for more details. 34.16 + * 34.17 + * You should have received a copy of the GNU General Public License along 34.18 + * with this program; if not, write to the Free software Foundation, Inc., 34.19 + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA 34.20 + * 34.21 + */ 34.22 +#ifndef _VNET_SKB_UTIL_H_ 34.23 +#define _VNET_SKB_UTIL_H_ 34.24 + 34.25 +struct sk_buff; 34.26 +struct scatterlist; 34.27 + 34.28 +extern int skb_make_room(struct sk_buff **pskb, struct sk_buff *skb, int head_n, int tail_n); 34.29 + 34.30 +extern int skb_put_bits(const struct sk_buff *skb, int offset, void *src, int len); 34.31 + 34.32 +extern int pskb_put(struct sk_buff *skb, int n); 34.33 + 34.34 +extern void skb_print_bits(struct sk_buff *skb, int offset, int n); 34.35 + 34.36 +extern void buf_print(char *buf, int n); 34.37 + 34.38 +extern void *skb_trim_tail(struct sk_buff *skb, int n); 34.39 + 34.40 +extern int skb_scatterlist(struct sk_buff *skb, struct scatterlist *sg, 34.41 + int *sg_n, int offset, int len); 34.42 + 34.43 +extern void print_skb_data(char *msg, int count, struct sk_buff *skb, u8 *data, int len); 34.44 + 34.45 + 34.46 +#endif
35.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 35.2 +++ b/tools/vnet/vnet-module/tunnel.c Mon Nov 22 16:49:15 2004 +0000 35.3 @@ -0,0 +1,228 @@ 35.4 +/* 35.5 + * Copyright (C) 2004 Mike Wray <mike.wray@hp.com> 35.6 + * 35.7 + * This program is free software; you can redistribute it and/or modify 35.8 + * it under the terms of the GNU General Public License as published by the 35.9 + * Free Software Foundation; either version 2 of the License, or (at your 35.10 + * option) any later version. 35.11 + * 35.12 + * This program is distributed in the hope that it will be useful, but 35.13 + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 35.14 + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 35.15 + * for more details. 35.16 + * 35.17 + * You should have received a copy of the GNU General Public License along 35.18 + * with this program; if not, write to the Free software Foundation, Inc., 35.19 + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA 35.20 + * 35.21 + */ 35.22 +#include <linux/config.h> 35.23 +#include <linux/module.h> 35.24 +#include <linux/init.h> 35.25 + 35.26 +#include <linux/net.h> 35.27 +#include <linux/in.h> 35.28 +#include <linux/inet.h> 35.29 +#include <linux/netdevice.h> 35.30 + 35.31 +#include <net/ip.h> 35.32 +#include <net/protocol.h> 35.33 +#include <net/route.h> 35.34 +#include <linux/skbuff.h> 35.35 + 35.36 +#include <tunnel.h> 35.37 +#include <vnet.h> 35.38 +#include <varp.h> 35.39 +#include "hash_table.h" 35.40 + 35.41 +#define MODULE_NAME "VNET" 35.42 +//#define DEBUG 1 35.43 +#undef DEBUG 35.44 +#include "debug.h" 35.45 + 35.46 +void Tunnel_print(Tunnel *tunnel){ 35.47 + if(tunnel){ 35.48 + printk("Tunnel<%p base=%p ref=%02d type=%s>\n", 35.49 + tunnel, 35.50 + tunnel->base, 35.51 + atomic_read(&tunnel->refcount), 35.52 + tunnel->type->name); 35.53 + if(tunnel->base){ 35.54 + Tunnel_print(tunnel->base); 35.55 + } 35.56 + } else { 35.57 + printk("Tunnel<%p base=%p ref=%02d type=%s>\n", 35.58 + NULL, NULL, 0, "ip"); 35.59 + } 35.60 +} 35.61 + 35.62 +int Tunnel_create(TunnelType *type, u32 vnet, u32 addr, Tunnel *base, Tunnel **val){ 35.63 + int err = 0; 35.64 + Tunnel *tunnel = NULL; 35.65 + dprintf("> type=%s vnet=%d addr=" IPFMT " base=%s\n", 35.66 + type->name, vnet, NIPQUAD(addr), (base ? base->type->name : "ip")); 35.67 + if(!type || !type->open || !type->send || !type->close){ 35.68 + err = -EINVAL; 35.69 + goto exit; 35.70 + } 35.71 + tunnel = kmalloc(sizeof(Tunnel), GFP_ATOMIC); 35.72 + if(!tunnel){ 35.73 + err = -ENOMEM; 35.74 + goto exit; 35.75 + } 35.76 + atomic_set(&tunnel->refcount, 1); 35.77 + tunnel->key.vnet = vnet; 35.78 + tunnel->key.addr = addr; 35.79 + tunnel->type = type; 35.80 + tunnel->data = NULL; 35.81 + tunnel->send_stats = (TunnelStats){}; 35.82 + Tunnel_incref(base); 35.83 + tunnel->base = base; 35.84 + err = type->open(tunnel); 35.85 + exit: 35.86 + if(err && tunnel){ 35.87 + Tunnel_decref(tunnel); 35.88 + tunnel = NULL; 35.89 + } 35.90 + *val = tunnel; 35.91 + dprintf("< err=%d\n", err); 35.92 + return err; 35.93 +} 35.94 + 35.95 +int Tunnel_open(TunnelType *type, u32 vnet, u32 addr, Tunnel *base, Tunnel **tunnel){ 35.96 + int err = 0; 35.97 + 35.98 + dprintf(">\n"); 35.99 + err = Tunnel_create(type, vnet, addr, base, tunnel); 35.100 + if(err) goto exit; 35.101 + err = Tunnel_add(*tunnel); 35.102 + exit: 35.103 + if(err){ 35.104 + Tunnel_decref(*tunnel); 35.105 + *tunnel = NULL; 35.106 + } 35.107 + dprintf("< err=%d\n", err); 35.108 + return err; 35.109 +} 35.110 + 35.111 +void TunnelStats_update(TunnelStats *stats, int len, int err){ 35.112 + dprintf(">len=%d err=%d\n", len, err); 35.113 + if(err){ 35.114 + stats->dropped_bytes += len; 35.115 + stats->dropped_packets++; 35.116 + } else { 35.117 + stats->bytes += len; 35.118 + stats->packets++; 35.119 + } 35.120 + dprintf("<\n"); 35.121 +} 35.122 + 35.123 +/** Table of tunnels, indexed by vnet and addr. */ 35.124 +HashTable *tunnel_table = NULL; 35.125 + 35.126 +static inline Hashcode tunnel_table_key_hash_fn(void *k){ 35.127 + TunnelKey *key = k; 35.128 + Hashcode h = 0; 35.129 + h = hash_2ul(key->vnet, key->addr); 35.130 + return h; 35.131 +} 35.132 + 35.133 +static int tunnel_table_key_equal_fn(void *k1, void *k2){ 35.134 + TunnelKey *key1 = k1; 35.135 + TunnelKey *key2 = k2; 35.136 + return (key1->vnet == key2->vnet) 35.137 + && (key1->addr == key2->addr); 35.138 +} 35.139 + 35.140 +static void tunnel_table_entry_free_fn(HashTable *table, HTEntry *entry){ 35.141 + Tunnel *tunnel; 35.142 + if(!entry) return; 35.143 + tunnel = entry->value; 35.144 + //dprintf(">\n"); Tunnel_print(tunnel); 35.145 + Tunnel_decref(tunnel); 35.146 + HTEntry_free(entry); 35.147 +} 35.148 + 35.149 +int Tunnel_init(void){ 35.150 + int err = 0; 35.151 + dprintf(">\n"); 35.152 + tunnel_table = HashTable_new(0); 35.153 + if(!tunnel_table){ 35.154 + err = -ENOMEM; 35.155 + goto exit; 35.156 + } 35.157 + tunnel_table->entry_free_fn = tunnel_table_entry_free_fn; 35.158 + tunnel_table->key_hash_fn = tunnel_table_key_hash_fn; 35.159 + tunnel_table->key_equal_fn = tunnel_table_key_equal_fn; 35.160 + exit: 35.161 + dprintf("< err=%d\n", err); 35.162 + return err; 35.163 +} 35.164 + 35.165 +/** Lookup tunnel state by vnet and destination. 35.166 + * 35.167 + * @param vnet vnet 35.168 + * @param addr destination address 35.169 + * @return tunnel state or NULL 35.170 + */ 35.171 +Tunnel * Tunnel_lookup(u32 vnet, u32 addr){ 35.172 + Tunnel *tunnel = NULL; 35.173 + TunnelKey key = {.vnet = vnet, .addr = addr }; 35.174 + dprintf(">\n"); 35.175 + tunnel = HashTable_get(tunnel_table, &key); 35.176 + Tunnel_incref(tunnel); 35.177 + dprintf("< tunnel=%p\n", tunnel); 35.178 + return tunnel; 35.179 +} 35.180 + 35.181 +int Tunnel_add(Tunnel *tunnel){ 35.182 + int err = 0; 35.183 + dprintf(">\n"); 35.184 + if(HashTable_add(tunnel_table, tunnel, tunnel)){ 35.185 + Tunnel_incref(tunnel); 35.186 + } else { 35.187 + err = -ENOMEM; 35.188 + } 35.189 + dprintf("< err=%d\n", err); 35.190 + return err; 35.191 +} 35.192 + 35.193 +int Tunnel_del(Tunnel *tunnel){ 35.194 + return HashTable_remove(tunnel_table, tunnel); 35.195 +} 35.196 + 35.197 +/** Do tunnel send processing on a packet. 35.198 + * 35.199 + * @param tunnel tunnel state 35.200 + * @param skb packet 35.201 + * @return 0 on success, error code otherwise 35.202 + */ 35.203 +int Tunnel_send(Tunnel *tunnel, struct sk_buff *skb){ 35.204 + int err = 0; 35.205 + int len; 35.206 + dprintf("> tunnel=%p skb=%p\n", tunnel, skb); 35.207 + len = skb->len; 35.208 + if(tunnel){ 35.209 + dprintf("> type=%s type->send...\n", tunnel->type->name); 35.210 + err = tunnel->type->send(tunnel, skb); 35.211 + // Must not refer to skb after sending - might have been freed. 35.212 + TunnelStats_update(&tunnel->send_stats, len, err); 35.213 + } else { 35.214 + struct net_device *dev = NULL; 35.215 + err = vnet_get_device(DEVICE, &dev); 35.216 + if(err) goto exit; 35.217 + skb->dev = dev; 35.218 + err = skb_xmit(skb); 35.219 + dev_put(dev); 35.220 + } 35.221 + exit: 35.222 + dprintf("< err=%d\n", err); 35.223 + return err; 35.224 +} 35.225 + 35.226 +int __init tunnel_module_init(void){ 35.227 + return Tunnel_init(); 35.228 +} 35.229 + 35.230 +void __exit tunnel_module_exit(void){ 35.231 +}
36.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 36.2 +++ b/tools/vnet/vnet-module/tunnel.h Mon Nov 22 16:49:15 2004 +0000 36.3 @@ -0,0 +1,101 @@ 36.4 +/* 36.5 + * Copyright (C) 2004 Mike Wray <mike.wray@hp.com> 36.6 + * 36.7 + * This program is free software; you can redistribute it and/or modify 36.8 + * it under the terms of the GNU General Public License as published by the 36.9 + * Free Software Foundation; either version 2 of the License, or (at your 36.10 + * option) any later version. 36.11 + * 36.12 + * This program is distributed in the hope that it will be useful, but 36.13 + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 36.14 + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 36.15 + * for more details. 36.16 + * 36.17 + * You should have received a copy of the GNU General Public License along 36.18 + * with this program; if not, write to the Free software Foundation, Inc., 36.19 + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA 36.20 + * 36.21 + */ 36.22 +#ifndef __VNET_TUNNEL_H__ 36.23 +#define __VNET_TUNNEL_H__ 36.24 + 36.25 +#include <linux/types.h> 36.26 +#include <linux/slab.h> 36.27 +#include <asm/atomic.h> 36.28 + 36.29 +struct sk_buff; 36.30 +struct Tunnel; 36.31 + 36.32 +typedef struct TunnelType { 36.33 + const char *name; 36.34 + int (*open)(struct Tunnel *tunnel); 36.35 + int (*send)(struct Tunnel *tunnel, struct sk_buff *skb); 36.36 + void (*close)(struct Tunnel *tunnel); 36.37 +} TunnelType; 36.38 + 36.39 +typedef struct TunnelStats { 36.40 + int bytes; 36.41 + int packets; 36.42 + int dropped_bytes; 36.43 + int dropped_packets; 36.44 +} TunnelStats; 36.45 + 36.46 +typedef struct TunnelKey { 36.47 + u32 vnet; 36.48 + u32 addr; 36.49 +} TunnelKey; 36.50 + 36.51 +typedef struct Tunnel { 36.52 + /** Key identifying the tunnel. Must be first. */ 36.53 + struct TunnelKey key; 36.54 + /** Reference count. */ 36.55 + atomic_t refcount; 36.56 + /** Tunnel type. */ 36.57 + struct TunnelType *type; 36.58 + /** Statistics. */ 36.59 + struct TunnelStats send_stats; 36.60 + /** Type-dependent state. */ 36.61 + void *data; 36.62 + /** Underlying tunnel (may be null). */ 36.63 + struct Tunnel *base; 36.64 +} Tunnel; 36.65 + 36.66 +extern void Tunnel_print(Tunnel *tunnel); 36.67 + 36.68 +/** Decrement the reference count, freeing if zero. 36.69 + * 36.70 + * @param tunnel tunnel (may be null) 36.71 + */ 36.72 +static inline void Tunnel_decref(Tunnel *tunnel){ 36.73 + if(!tunnel) return; 36.74 + if(atomic_dec_and_test(&tunnel->refcount)){ 36.75 + printk("%s> Closing tunnel:\n", __FUNCTION__); 36.76 + Tunnel_print(tunnel); 36.77 + tunnel->type->close(tunnel); 36.78 + Tunnel_decref(tunnel->base); 36.79 + kfree(tunnel); 36.80 + } 36.81 +} 36.82 + 36.83 +/** Increment the reference count. 36.84 + * 36.85 + * @param tunnel tunnel (may be null) 36.86 + */ 36.87 +static inline void Tunnel_incref(Tunnel *tunnel){ 36.88 + if(!tunnel) return; 36.89 + atomic_inc(&tunnel->refcount); 36.90 +} 36.91 + 36.92 +extern int Tunnel_init(void); 36.93 +extern Tunnel * Tunnel_lookup(u32 vnet, u32 addr); 36.94 +extern int Tunnel_add(Tunnel *tunnel); 36.95 +extern int Tunnel_del(Tunnel *tunnel); 36.96 +extern int Tunnel_send(Tunnel *tunnel, struct sk_buff *skb); 36.97 + 36.98 +extern int Tunnel_create(TunnelType *type, u32 vnet, u32 addr, Tunnel *base, Tunnel **tunnelp); 36.99 +extern int Tunnel_open(TunnelType *type, u32 vnet, u32 addr, Tunnel *base, Tunnel **tunnelp); 36.100 + 36.101 +extern int tunnel_module_init(void); 36.102 +extern void tunnel_module_exit(void); 36.103 + 36.104 +#endif /* !__VNET_TUNNEL_H__ */
37.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 37.2 +++ b/tools/vnet/vnet-module/varp.c Mon Nov 22 16:49:15 2004 +0000 37.3 @@ -0,0 +1,1236 @@ 37.4 +/* 37.5 + * Copyright (C) 2004 Mike Wray <mike.wray@hp.com> 37.6 + * 37.7 + * This program is free software; you can redistribute it and/or modify 37.8 + * it under the terms of the GNU General Public License as published by the 37.9 + * Free Software Foundation; either version 2 of the License, or (at your 37.10 + * option) any later version. 37.11 + * 37.12 + * This program is distributed in the hope that it will be useful, but 37.13 + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 37.14 + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 37.15 + * for more details. 37.16 + * 37.17 + * You should have received a copy of the GNU General Public License along 37.18 + * with this program; if not, write to the Free software Foundation, Inc., 37.19 + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA 37.20 + * 37.21 + */ 37.22 + 37.23 +#include <linux/config.h> 37.24 +#include <linux/kernel.h> 37.25 +#include <linux/module.h> 37.26 +#include <linux/init.h> 37.27 +#include <linux/string.h> 37.28 +#include <linux/version.h> 37.29 + 37.30 +#include <linux/net.h> 37.31 +#include <linux/in.h> 37.32 +#include <linux/inet.h> 37.33 +#include <linux/netdevice.h> 37.34 +#include <linux/udp.h> 37.35 + 37.36 +#include <net/ip.h> 37.37 +#include <net/protocol.h> 37.38 +#include <net/route.h> 37.39 +#include <linux/skbuff.h> 37.40 +#include <linux/spinlock.h> 37.41 +#include <asm/semaphore.h> 37.42 + 37.43 +#include <tunnel.h> 37.44 +#include <vnet.h> 37.45 +#include <vif.h> 37.46 +#include <varp.h> 37.47 +#include <if_varp.h> 37.48 + 37.49 +#include "allocate.h" 37.50 +#include "hash_table.h" 37.51 +#include "sys_net.h" 37.52 +#include "sys_string.h" 37.53 + 37.54 +#define MODULE_NAME "VARP" 37.55 +//#define DEBUG 1 37.56 +#undef DEBUG 37.57 +#include "debug.h" 37.58 + 37.59 + 37.60 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) 37.61 +// The 'ethernet' field in the skb->mac union went away. 37.62 +#define MAC_ETH(_skb) ((struct ethhdr *)(_skb)->mac.raw) 37.63 +#else 37.64 +#define MAC_ETH(_skb) ((_skb)->mac.ethernet) 37.65 +#endif 37.66 + 37.67 +/** @file VARP: Virtual ARP. 37.68 + * 37.69 + * Handles virtual ARP requests for vnet/vmac. 37.70 + */ 37.71 + 37.72 +/* 37.73 + 37.74 +Varp uses UDP on port 1798. 37.75 + 37.76 +on domain up: ? 37.77 + send varp.announce { id, vmac, vnet, coa } for each vif 37.78 + that haven't announced before, or has changed. 37.79 + install vif entries in local table. 37.80 + 37.81 +on varp.announce{ id, vmac, vnet, coa }: 37.82 + update VARP entry for vmac x vnet if have one, reset ttl. 37.83 + 37.84 +on varp.request { id, vmac, vnet }: 37.85 + if have a vif for the requested vmac/vnet, 37.86 + reply with varp.announce{ id, vmac, vnet, coa } 37.87 + 37.88 +on timer: 37.89 + traverse VARP table, flush old entries. 37.90 + 37.91 +on probe timer: 37.92 + probe again if not out of tries. 37.93 + if out of tries invalidate entry. 37.94 + 37.95 +*/ 37.96 + 37.97 +/** Time-to-live of varp entries (in jiffies).*/ 37.98 +#define VARP_ENTRY_TTL (60*HZ) 37.99 + 37.100 +/** Maximum number of varp probes to make. */ 37.101 +#define VARP_PROBE_MAX 5 37.102 + 37.103 +/** Interval between varp probes (in jiffies). */ 37.104 +#define VARP_PROBE_INTERVAL (3*HZ) 37.105 + 37.106 +/** Maximum number of queued skbs for a varp entry. */ 37.107 +#define VARP_QUEUE_MAX 16 37.108 + 37.109 +/** Number of buckets in the varp table (must be prime). */ 37.110 +#define VARP_TABLE_BUCKETS 3001 37.111 + 37.112 +/** Varp entry states. */ 37.113 +enum { 37.114 + VARP_STATE_INCOMPLETE = 1, 37.115 + VARP_STATE_REACHABLE = 2, 37.116 + VARP_STATE_FAILED = 3 37.117 +}; 37.118 + 37.119 +/** Varp entry flags. */ 37.120 +enum { 37.121 + VARP_FLAG_PROBING = 1, 37.122 + VARP_FLAG_PERMANENT = 2, 37.123 +}; 37.124 + 37.125 +/** Key for varp entries. */ 37.126 +typedef struct VarpKey { 37.127 + /** Vnet id (host order). */ 37.128 + u32 vnet; 37.129 + /** Virtual MAC address. */ 37.130 + Vmac vmac; 37.131 +} VarpKey; 37.132 + 37.133 +/** An entry in the varp cache. */ 37.134 +typedef struct VarpEntry { 37.135 + /** Key for the entry. */ 37.136 + VarpKey key; 37.137 + /** Care-of address for the key. */ 37.138 + u32 addr; 37.139 + /** Last-updated timestamp. */ 37.140 + unsigned long timestamp; 37.141 + /** State. */ 37.142 + short state; 37.143 + /** Flags. */ 37.144 + short flags; 37.145 + /** Reference count. */ 37.146 + atomic_t refcount; 37.147 + /** Lock. */ 37.148 + rwlock_t lock; 37.149 + /** How many probes have been made. */ 37.150 + atomic_t probes; 37.151 + /** Probe timer. */ 37.152 + struct timer_list timer; 37.153 + void (*error)(struct VarpEntry *ventry, struct sk_buff *skb); 37.154 + /** Outbound skb queue. */ 37.155 + struct sk_buff_head queue; 37.156 + /** Maximum size of the queue. */ 37.157 + int queue_max; 37.158 + 37.159 + int locks; 37.160 +} VarpEntry; 37.161 + 37.162 +/** The varp cache. Varp entries indexed by VarpKey. */ 37.163 +typedef struct VarpTable { 37.164 + 37.165 + HashTable *table; 37.166 + 37.167 + /** Sweep timer. */ 37.168 + struct timer_list timer; 37.169 + 37.170 + /** Lock. Need to use a semaphore instead of a spinlock because 37.171 + * some operations under the varp table lock can schedule - and 37.172 + * you mustn't hold a spinlock when scheduling. 37.173 + */ 37.174 + struct semaphore lock; 37.175 + 37.176 +} VarpTable; 37.177 + 37.178 +/** The varp cache. */ 37.179 +static VarpTable *varp_table = NULL; 37.180 + 37.181 +/** Module parameter for the multicast address. */ 37.182 +static char *varp_mcaddr = NULL; 37.183 + 37.184 +/** Multicast address (network order). */ 37.185 +u32 varp_mcast_addr = 0; 37.186 + 37.187 +/** Unicast address (network order). */ 37.188 +u32 varp_ucast_addr = 0; 37.189 + 37.190 +/** UDP port (network order). */ 37.191 +u16 varp_port = 0; 37.192 + 37.193 +/** Network device to use. */ 37.194 +char *varp_device = DEVICE; 37.195 + 37.196 +#define VarpTable_read_lock(z, flags) do{ (flags) = 0; down(&(z)->lock); } while(0) 37.197 +#define VarpTable_read_unlock(z, flags) do{ (flags) = 0; up(&(z)->lock); } while(0) 37.198 +#define VarpTable_write_lock(z, flags) do{ (flags) = 0; down(&(z)->lock); } while(0) 37.199 +#define VarpTable_write_unlock(z, flags) do{ (flags) = 0; up(&(z)->lock); } while(0) 37.200 + 37.201 +#define VarpEntry_lock(ventry, flags) write_lock_irqsave(&(ventry)->lock, (flags)) 37.202 +#define VarpEntry_unlock(ventry, flags) write_unlock_irqrestore(&(ventry)->lock, (flags)) 37.203 + 37.204 +void VarpTable_sweep(VarpTable *z, int all); 37.205 +void VarpTable_print(VarpTable *z); 37.206 + 37.207 +/** Print the varp cache (if debug on). 37.208 + */ 37.209 +void varp_dprint(void){ 37.210 +#ifdef DEBUG 37.211 + VarpTable_print(varp_table); 37.212 +#endif 37.213 +} 37.214 + 37.215 +/** Print varp info and the varp cache. 37.216 + */ 37.217 +void varp_print(void){ 37.218 + printk(KERN_INFO "=== VARP ===============================================================\n"); 37.219 + printk(KERN_INFO "varp_device %s\n", varp_device); 37.220 + printk(KERN_INFO "varp_mcast_addr " IPFMT "\n", NIPQUAD(varp_mcast_addr)); 37.221 + printk(KERN_INFO "varp_ucast_addr " IPFMT "\n", NIPQUAD(varp_ucast_addr)); 37.222 + printk(KERN_INFO "varp_port %d\n", ntohs(varp_port)); 37.223 + VarpTable_print(varp_table); 37.224 + printk(KERN_INFO "========================================================================\n"); 37.225 +} 37.226 + 37.227 +/** Lookup a network device by name. 37.228 + * 37.229 + * @param name device name 37.230 + * @param dev return parameter for the device 37.231 + * @return 0 on success, error code otherwise 37.232 + */ 37.233 +int vnet_get_device(const char *name, struct net_device **dev){ 37.234 + int err = 0; 37.235 + *dev = dev_get_by_name(name); 37.236 + if(!*dev){ 37.237 + err = -ENETDOWN; 37.238 + } 37.239 + return err; 37.240 +} 37.241 + 37.242 +/** Get the source address from a device. 37.243 + * 37.244 + * @param dev device 37.245 + * @param addr return parameter for address 37.246 + * @return 0 on success, error code otherwise 37.247 + */ 37.248 +int vnet_get_device_address(struct net_device *dev, u32 *addr){ 37.249 + int err = 0; 37.250 + struct in_device *in_dev; 37.251 + 37.252 + //printk("%s>\n", __FUNCTION__); 37.253 + in_dev = in_dev_get(dev); 37.254 + if(!in_dev){ 37.255 + err = -EIO; 37.256 + goto exit; 37.257 + } 37.258 + *addr = in_dev->ifa_list->ifa_address; 37.259 + in_dev_put(in_dev); 37.260 + exit: 37.261 + //printk("%s< err=%d\n", __FUNCTION__, err); 37.262 + return err; 37.263 +} 37.264 + 37.265 +#ifndef LL_RESERVED_SPACE 37.266 +#define HH_DATA_MOD 16 37.267 +#define LL_RESERVED_SPACE(dev) \ 37.268 + ((dev->hard_header_len & ~(HH_DATA_MOD - 1)) + HH_DATA_MOD) 37.269 +#endif 37.270 + 37.271 +/** Send a varp protocol message. 37.272 + * 37.273 + * @param opcode varp opcode (host order) 37.274 + * @param dev device (may be null) 37.275 + * @param skb skb being replied to (may be null) 37.276 + * @param vnet vnet id (in host order) 37.277 + * @param vmac vmac (in network order) 37.278 + * @return 0 on success, error code otherwise 37.279 + */ 37.280 +int varp_send(u16 opcode, struct net_device *dev, struct sk_buff *skbin, 37.281 + u32 vnet, Vmac *vmac){ 37.282 + int err = 0; 37.283 + int link_n = 0; 37.284 + int ip_n = sizeof(struct iphdr); 37.285 + int udp_n = sizeof(struct udphdr); 37.286 + int varp_n = sizeof(VarpHdr); 37.287 + struct sk_buff *skbout = NULL; 37.288 + struct in_device *in_dev = NULL; 37.289 + VarpHdr *varph = NULL; 37.290 + u8 macbuf[6] = {}; 37.291 + u8 *smac, *dmac; 37.292 + u32 saddr, daddr; 37.293 + u16 sport, dport; 37.294 + 37.295 + dmac = macbuf; 37.296 + dprintf("> opcode=%d vnet=%d vmac=" MACFMT "\n", 37.297 + opcode, ntohl(vnet), MAC6TUPLE(vmac->mac)); 37.298 + if(!dev){ 37.299 + //todo: should use routing for daddr to get device. 37.300 + err = vnet_get_device(varp_device, &dev); 37.301 + if(err) goto exit; 37.302 + } 37.303 + link_n = LL_RESERVED_SPACE(dev); 37.304 + in_dev = in_dev_get(dev); 37.305 + if(!in_dev) goto exit; 37.306 + 37.307 + smac = dev->dev_addr; 37.308 + saddr = in_dev->ifa_list->ifa_address; 37.309 + 37.310 + if(skbin){ 37.311 + dmac = MAC_ETH(skbin)->h_source; 37.312 + sport = skbin->h.uh->dest; 37.313 + daddr = skbin->nh.iph->saddr; 37.314 + //dport = skbin->h.uh->source; 37.315 + dport = varp_port; 37.316 + } else { 37.317 + if(!in_dev) goto exit; 37.318 + if(MULTICAST(varp_mcast_addr)){ 37.319 + daddr = varp_mcast_addr; 37.320 + ip_eth_mc_map(daddr, dmac); 37.321 + } else { 37.322 + daddr = in_dev->ifa_list->ifa_broadcast; 37.323 + dmac = dev->broadcast; 37.324 + } 37.325 + sport = varp_port; 37.326 + dport = varp_port; 37.327 + } 37.328 + in_dev_put(in_dev); 37.329 + 37.330 + dprintf("> smac=" MACFMT " dmac=" MACFMT "\n", MAC6TUPLE(smac), MAC6TUPLE(dmac)); 37.331 + dprintf("> saddr=" IPFMT " daddr=" IPFMT "\n", NIPQUAD(saddr), NIPQUAD(daddr)); 37.332 + dprintf("> sport=%u dport=%u\n", ntohs(sport), ntohs(dport)); 37.333 + 37.334 + skbout = alloc_skb(link_n + ip_n + udp_n + varp_n, GFP_ATOMIC); 37.335 + if (!skbout){ 37.336 + err = -ENOMEM; 37.337 + goto exit; 37.338 + } 37.339 + skbout->dev = dev; 37.340 + skb_reserve(skbout, link_n); 37.341 + skbout->protocol = htons(ETH_P_IP); 37.342 + 37.343 + // Device header. Pushes device header on front of skb. 37.344 + if (dev->hard_header){ 37.345 + err = dev->hard_header(skbout, dev, ETH_P_IP, dmac, smac, skbout->len); 37.346 + if(err < 0) goto exit; 37.347 + skbout->mac.raw = skbout->data; 37.348 + } 37.349 + 37.350 + // IP header. 37.351 + skbout->nh.raw = skb_put(skbout, ip_n); 37.352 + skbout->nh.iph->version = 4; 37.353 + skbout->nh.iph->ihl = ip_n / 4; 37.354 + skbout->nh.iph->tos = 0; 37.355 + skbout->nh.iph->tot_len = htons(ip_n + udp_n + varp_n); 37.356 + skbout->nh.iph->id = 0; 37.357 + skbout->nh.iph->frag_off = 0; 37.358 + skbout->nh.iph->ttl = 64; 37.359 + skbout->nh.iph->protocol = IPPROTO_UDP; 37.360 + skbout->nh.iph->saddr = saddr; 37.361 + skbout->nh.iph->daddr = daddr; 37.362 + skbout->nh.iph->check = 0; 37.363 + 37.364 + // UDP header. 37.365 + skbout->h.raw = skb_put(skbout, udp_n); 37.366 + skbout->h.uh->source = sport; 37.367 + skbout->h.uh->dest = dport; 37.368 + skbout->h.uh->len = htons(udp_n + varp_n); 37.369 + skbout->h.uh->check = 0; 37.370 + 37.371 + // Varp header. 37.372 + varph = (void*)skb_put(skbout, varp_n); 37.373 + *varph = (VarpHdr){}; 37.374 + varph->id = htons(VARP_ID); 37.375 + varph->opcode = htons(opcode); 37.376 + varph->vnet = htonl(vnet); 37.377 + varph->vmac = *vmac; 37.378 + varph->addr = saddr; 37.379 + 37.380 + err = skb_xmit(skbout); 37.381 + 37.382 + exit: 37.383 + if(err && skbout) kfree_skb(skbout); 37.384 + dprintf("< err=%d\n", err); 37.385 + return err; 37.386 +} 37.387 + 37.388 +/** Send a varp request for the vnet and destination mac of a packet. 37.389 + * 37.390 + * @param skb packet 37.391 + * @param vnet vnet (in host order) 37.392 + * @return 0 on success, error code otherwise 37.393 + */ 37.394 +int varp_solicit(struct sk_buff *skb, int vnet){ 37.395 + int err = 0; 37.396 + dprintf("> skb=%p\n", skb); 37.397 + varp_dprint(); 37.398 + err = varp_send(VARP_OP_REQUEST, NULL, NULL, 37.399 + vnet, (Vmac*)MAC_ETH(skb)->h_dest); 37.400 + dprintf("< err=%d\n", err); 37.401 + return err; 37.402 +} 37.403 + 37.404 +/* Test some flags. 37.405 + * 37.406 + * @param z varp entry 37.407 + * @param flags to test 37.408 + * @return nonzero if flags set 37.409 + */ 37.410 +int VarpEntry_get_flags(VarpEntry *z, int flags){ 37.411 + return z->flags & flags; 37.412 +} 37.413 + 37.414 +/** Set some flags. 37.415 + * 37.416 + * @param z varp entry 37.417 + * @param flags to set 37.418 + * @param set set flags on if nonzero, off if zero 37.419 + * @return new flags value 37.420 + */ 37.421 +int VarpEntry_set_flags(VarpEntry *z, int flags, int set){ 37.422 + if(set){ 37.423 + z->flags |= flags; 37.424 + } else { 37.425 + z->flags &= ~flags; 37.426 + } 37.427 + return z->flags; 37.428 +} 37.429 + 37.430 +/** Print a varp entry. 37.431 + * 37.432 + * @param ventry varp entry 37.433 + */ 37.434 +void VarpEntry_print(VarpEntry *ventry){ 37.435 + if(ventry){ 37.436 + char *c, *d; 37.437 + switch(ventry->state){ 37.438 + case VARP_STATE_INCOMPLETE: c = "INC"; break; 37.439 + case VARP_STATE_REACHABLE: c = "RCH"; break; 37.440 + case VARP_STATE_FAILED: c = "FLD"; break; 37.441 + default: c = "UNK"; break; 37.442 + } 37.443 + d = (VarpEntry_get_flags(ventry, VARP_FLAG_PROBING) ? "P" : " "); 37.444 + 37.445 + printk(KERN_INFO "VENTRY(%p ref=%1d %s %s vnet=%d vmac=" MACFMT " addr=" IPFMT " q=%d t=%lu)\n", 37.446 + ventry, 37.447 + atomic_read(&ventry->refcount), 37.448 + c, d, 37.449 + ventry->key.vnet, 37.450 + MAC6TUPLE(ventry->key.vmac.mac), 37.451 + NIPQUAD(ventry->addr), 37.452 + skb_queue_len(&ventry->queue), 37.453 + ventry->timestamp); 37.454 + } else { 37.455 + printk("VENTRY: Null!\n"); 37.456 + } 37.457 +} 37.458 + 37.459 +/** Free a varp entry. 37.460 + * 37.461 + * @param z varp entry 37.462 + */ 37.463 +void VarpEntry_free(VarpEntry *z){ 37.464 + if(!z) return; 37.465 + deallocate(z); 37.466 +} 37.467 + 37.468 +/** Increment reference count. 37.469 + * 37.470 + * @param z varp entry (may be null) 37.471 + */ 37.472 +void VarpEntry_incref(VarpEntry *z){ 37.473 + if(!z) return; 37.474 + atomic_inc(&z->refcount); 37.475 + //dprintf("> "); VarpEntry_print(z); 37.476 +} 37.477 + 37.478 +/** Decrement reference count, freeing if zero. 37.479 + * 37.480 + * @param z varp entry (may be null) 37.481 + */ 37.482 +void VarpEntry_decref(VarpEntry *z){ 37.483 + if(!z) return; 37.484 + //dprintf("> "); VarpEntry_print(z); 37.485 + if(atomic_dec_and_test(&z->refcount)){ 37.486 + //dprintf("> freeing %p...\n", z); 37.487 + VarpEntry_free(z); 37.488 + } 37.489 +} 37.490 + 37.491 +/** Call the error handler. 37.492 + * 37.493 + * @param ventry varp entry 37.494 + */ 37.495 +void VarpEntry_error(VarpEntry *ventry){ 37.496 + struct sk_buff *skb; 37.497 + skb = skb_peek(&ventry->queue); 37.498 + if(!skb) return; 37.499 + if(ventry->error) ventry->error(ventry, skb); 37.500 + skb_queue_purge(&ventry->queue); 37.501 +} 37.502 + 37.503 +/** Schedule the varp entry timer. 37.504 + * Must increment the reference count before doing 37.505 + * this the first time, so the ventry won' be freed 37.506 + * before the timer goes off. 37.507 + * 37.508 + * @param ventry varp entry 37.509 + */ 37.510 +void VarpEntry_schedule(VarpEntry *ventry){ 37.511 + unsigned long now = jiffies; 37.512 + ventry->timer.expires = now + VARP_PROBE_INTERVAL; 37.513 + add_timer(&ventry->timer); 37.514 +} 37.515 + 37.516 +/** Function called when a varp entry timer goes off. 37.517 + * If the entry is still incomplete, carries on probing. 37.518 + * Otherwise stops probing. 37.519 + * 37.520 + * @param arg ventry 37.521 + */ 37.522 +static void varp_timer_fn(unsigned long arg){ 37.523 + unsigned long flags; 37.524 + VarpEntry *ventry = (VarpEntry *)arg; 37.525 + struct sk_buff *skb = NULL; 37.526 + int locked = 0, probing = 0; 37.527 + 37.528 + dprintf(">\n"); //VarpEntry_print(ventry); 37.529 + VarpEntry_lock(ventry, flags); 37.530 + locked = 1; 37.531 + if(ventry->state == VARP_STATE_REACHABLE){ 37.532 + // Do nothing. 37.533 + } else { 37.534 + // Probe if haven't run out of tries, otherwise fail. 37.535 + if(atomic_read(&ventry->probes) < VARP_PROBE_MAX){ 37.536 + probing = 1; 37.537 + VarpEntry_schedule(ventry); 37.538 + skb = skb_peek(&ventry->queue); 37.539 + if(skb){ 37.540 + dprintf("> skbs in queue - solicit\n"); 37.541 + atomic_inc(&ventry->probes); 37.542 + VarpEntry_unlock(ventry, flags); 37.543 + locked = 0; 37.544 + varp_solicit(skb, ventry->key.vnet); 37.545 + } else { 37.546 + dprintf("> empty queue.\n"); 37.547 + } 37.548 + } else { 37.549 + dprintf("> Out of probes: FAILED\n"); 37.550 + VarpEntry_error(ventry); 37.551 + ventry->state = VARP_STATE_FAILED; 37.552 + } 37.553 + } 37.554 + VarpEntry_set_flags(ventry, VARP_FLAG_PROBING, probing); 37.555 + if(locked) VarpEntry_unlock(ventry, flags); 37.556 + if(!probing) VarpEntry_decref(ventry); 37.557 + dprintf("<\n"); 37.558 +} 37.559 + 37.560 +/** Default error function for varp entries. 37.561 + * 37.562 + * @param ventry varp entry 37.563 + * @param skb packet dropped because of error 37.564 + */ 37.565 +static void varp_error_fn(VarpEntry *ventry, struct sk_buff *skb){ 37.566 +} 37.567 + 37.568 +/** Create a varp entry. Initializes the internal state. 37.569 + * 37.570 + * @param vnet vnet id 37.571 + * @param vmac virtual MAC address (copied) 37.572 + * @return ventry or null 37.573 + */ 37.574 +VarpEntry * VarpEntry_new(u32 vnet, Vmac *vmac){ 37.575 + VarpEntry *z = ALLOCATE(VarpEntry); 37.576 + if(z){ 37.577 + unsigned long now = jiffies; 37.578 + 37.579 + atomic_set(&z->refcount, 1); 37.580 + z->lock = RW_LOCK_UNLOCKED; 37.581 + z->state = VARP_STATE_INCOMPLETE; 37.582 + z->queue_max = VARP_QUEUE_MAX; 37.583 + skb_queue_head_init(&z->queue); 37.584 + init_timer(&z->timer); 37.585 + z->timer.data = (unsigned long)z; 37.586 + z->timer.function = varp_timer_fn; 37.587 + z->timestamp = now; 37.588 + z->error = varp_error_fn; 37.589 + 37.590 + z->key.vnet = vnet; 37.591 + z->key.vmac = *vmac; 37.592 + } 37.593 + return z; 37.594 +} 37.595 + 37.596 +/** Hash function for keys in the varp cache. 37.597 + * Hashes the vnet id and mac. 37.598 + * 37.599 + * @param k key (VarpKey) 37.600 + * @return hashcode 37.601 + */ 37.602 +Hashcode varp_key_hash_fn(void *k){ 37.603 + VarpKey *key = k; 37.604 + Hashcode h; 37.605 + h = hash_2ul(key->vnet, 37.606 + (key->vmac.mac[0] << 24) | 37.607 + (key->vmac.mac[1] << 16) | 37.608 + (key->vmac.mac[2] << 8) | 37.609 + (key->vmac.mac[3] )); 37.610 + h = hash_hul(h, 37.611 + (key->vmac.mac[4] << 8) | 37.612 + (key->vmac.mac[5] )); 37.613 + return h; 37.614 +} 37.615 + 37.616 +/** Test equality for keys in the varp cache. 37.617 + * Compares vnet and mac. 37.618 + * 37.619 + * @param k1 key to compare (VarpKey) 37.620 + * @param k2 key to compare (VarpKey) 37.621 + * @return 1 if equal, 0 otherwise 37.622 + */ 37.623 +int varp_key_equal_fn(void *k1, void *k2){ 37.624 + VarpKey *key1 = k1; 37.625 + VarpKey *key2 = k2; 37.626 + return (key1->vnet == key2->vnet) 37.627 + && (memcmp(key1->vmac.mac, key2->vmac.mac, ETH_ALEN) == 0); 37.628 +} 37.629 + 37.630 +/** Free an entry in the varp cache. 37.631 + * 37.632 + * @param table containing table 37.633 + * @param entry entry to free 37.634 + */ 37.635 +static void varp_entry_free_fn(HashTable *table, HTEntry *entry){ 37.636 + VarpEntry *ventry; 37.637 + if(!entry) return; 37.638 + ventry = entry->value; 37.639 + if(ventry) VarpEntry_decref(ventry); 37.640 + HTEntry_free(entry); 37.641 +} 37.642 + 37.643 +/** Free the whole varp cache. 37.644 + * Dangerous. 37.645 + * 37.646 + * @param z varp cache 37.647 + */ 37.648 +void VarpTable_free(VarpTable *z){ 37.649 + unsigned long flags; 37.650 + if(!z) return; 37.651 + VarpTable_write_lock(z, flags); 37.652 + del_timer(&z->timer); 37.653 + z->timer.data = 0; 37.654 + if(z->table) HashTable_free(z->table); 37.655 + VarpTable_write_unlock(z, flags); 37.656 + deallocate(z); 37.657 +} 37.658 + 37.659 +/** Schedule the varp table timer. 37.660 + * 37.661 + * @param z varp table 37.662 + */ 37.663 +void VarpTable_schedule(VarpTable *z){ 37.664 + unsigned long now = jiffies; 37.665 + z->timer.expires = now + VARP_ENTRY_TTL; 37.666 + add_timer(&z->timer); 37.667 +} 37.668 + 37.669 +/** Function called when the varp table timer goes off. 37.670 + * Sweeps old varp cache entries and reschedules itself. 37.671 + * 37.672 + * @param arg varp table 37.673 + */ 37.674 +static void varp_table_timer_fn(unsigned long arg){ 37.675 + VarpTable *z = (VarpTable *)arg; 37.676 + //dprintf("> z=%p\n", z); 37.677 + if(z){ 37.678 + VarpTable_sweep(z, 0); 37.679 + VarpTable_schedule(z); 37.680 + } 37.681 + //dprintf("<\n"); 37.682 +} 37.683 + 37.684 +/** Print a varp table. 37.685 + * 37.686 + * @param z table 37.687 + */ 37.688 +void VarpTable_print(VarpTable *z){ 37.689 + HashTable_for_decl(entry); 37.690 + VarpEntry *ventry; 37.691 + unsigned long flags, vflags; 37.692 + 37.693 + //dprintf(">\n"); 37.694 + VarpTable_read_lock(z, flags); 37.695 + HashTable_for_each(entry, varp_table->table){ 37.696 + ventry = entry->value; 37.697 + VarpEntry_lock(ventry, vflags); 37.698 + VarpEntry_print(ventry); 37.699 + VarpEntry_unlock(ventry, vflags); 37.700 + } 37.701 + VarpTable_read_unlock(z, flags); 37.702 + //dprintf("<\n"); 37.703 +} 37.704 + 37.705 +/** Create a varp table. 37.706 + * 37.707 + * @return new table or null 37.708 + */ 37.709 +VarpTable * VarpTable_new(void){ 37.710 + int err = -ENOMEM; 37.711 + VarpTable *z = NULL; 37.712 + 37.713 + z = ALLOCATE(VarpTable); 37.714 + if(!z) goto exit; 37.715 + z->table = HashTable_new(VARP_TABLE_BUCKETS); 37.716 + if(!z->table) goto exit; 37.717 + z->table->key_equal_fn = varp_key_equal_fn; 37.718 + z->table->key_hash_fn = varp_key_hash_fn; 37.719 + z->table->entry_free_fn = varp_entry_free_fn; 37.720 + init_MUTEX(&z->lock); 37.721 + init_timer(&z->timer); 37.722 + z->timer.data = (unsigned long)z; 37.723 + z->timer.function = varp_table_timer_fn; 37.724 + VarpTable_schedule(z); 37.725 + err = 0; 37.726 + exit: 37.727 + if(err){ 37.728 + VarpTable_free(z); 37.729 + z = NULL; 37.730 + } 37.731 + return z; 37.732 +} 37.733 + 37.734 +/** Add a new entry to the varp table. 37.735 + * 37.736 + * @param z table 37.737 + * @param vnet vnet id 37.738 + * @param vmac virtual MAC address (copied) 37.739 + * @return new entry or null 37.740 + */ 37.741 +VarpEntry * VarpTable_add(VarpTable *z, u32 vnet, Vmac *vmac){ 37.742 + int err = -ENOMEM; 37.743 + VarpEntry *ventry; 37.744 + HTEntry *entry; 37.745 + unsigned long flags; 37.746 + 37.747 + ventry = VarpEntry_new(vnet, vmac); 37.748 + if(!ventry) goto exit; 37.749 + //dprintf("> "); VarpEntry_print(ventry); 37.750 + VarpTable_write_lock(z, flags); 37.751 + entry = HashTable_add(z->table, ventry, ventry); 37.752 + VarpTable_write_unlock(z, flags); 37.753 + if(!entry) goto exit; 37.754 + VarpEntry_incref(ventry); 37.755 + err = 0; 37.756 + exit: 37.757 + if(err){ 37.758 + VarpEntry_free(ventry); 37.759 + ventry = NULL; 37.760 + } 37.761 + return ventry; 37.762 +} 37.763 + 37.764 +/** Remove an entry from the varp table. 37.765 + * 37.766 + * @param z table 37.767 + * @param ventry entry to remove 37.768 + * @return removed count 37.769 + */ 37.770 +int VarpTable_remove(VarpTable *z, VarpEntry *ventry){ 37.771 + return HashTable_remove(z->table, ventry); 37.772 +} 37.773 + 37.774 +/** Lookup an entry in the varp table. 37.775 + * 37.776 + * @param z table 37.777 + * @param vnet vnet id 37.778 + * @param vmac virtual MAC addres 37.779 + * @return entry found or null 37.780 + */ 37.781 +VarpEntry * VarpTable_lookup(VarpTable *z, u32 vnet, Vmac *vmac){ 37.782 + unsigned long flags; 37.783 + VarpKey key = { .vnet = vnet, .vmac = *vmac }; 37.784 + VarpEntry *ventry; 37.785 + VarpTable_read_lock(z, flags); 37.786 + ventry = HashTable_get(z->table, &key); 37.787 + VarpTable_read_unlock(z, flags); 37.788 + if(ventry) VarpEntry_incref(ventry); 37.789 + return ventry; 37.790 +} 37.791 + 37.792 +/** Handle output for a reachable ventry. 37.793 + * Send the skb using the tunnel to the care-of address. 37.794 + * 37.795 + * @param ventry varp entry 37.796 + * @param skb skb to send 37.797 + * @return 0 on success, error code otherwise 37.798 + */ 37.799 +int VarpEntry_send(VarpEntry *ventry, struct sk_buff *skb){ 37.800 + int err = 0; 37.801 + unsigned long flags = 0; 37.802 + u32 addr; 37.803 + 37.804 + dprintf("> skb=%p\n", skb); 37.805 + addr = ventry->addr; 37.806 + VarpEntry_unlock(ventry, flags); 37.807 + err = vnet_tunnel_send(ventry->key.vnet, addr, skb); 37.808 + VarpEntry_lock(ventry, flags); 37.809 + dprintf("< err=%d\n", err); 37.810 + return err; 37.811 +} 37.812 + 37.813 +/** Handle output for a non-reachable ventry. Send messages to complete it. 37.814 + * If the entry is still incomplete, queue the skb, otherwise 37.815 + * send it. If the queue is full, dequeue and free an old skb to 37.816 + * make room for the new one. 37.817 + * 37.818 + * @param ventry varp entry 37.819 + * @param skb skb to send 37.820 + * @return 0 on success, error code otherwise 37.821 + */ 37.822 +int VarpEntry_resolve(VarpEntry *ventry, struct sk_buff *skb){ 37.823 + int err = 0; 37.824 + unsigned long flags = 0; 37.825 + 37.826 + dprintf("> skb=%p\n", skb); //VarpEntry_print(ventry); 37.827 + ventry->state = VARP_STATE_INCOMPLETE; 37.828 + atomic_set(&ventry->probes, 1); 37.829 + if(!VarpEntry_get_flags(ventry, VARP_FLAG_PROBING)){ 37.830 + VarpEntry_set_flags(ventry, VARP_FLAG_PROBING, 1); 37.831 + VarpEntry_incref(ventry); 37.832 + VarpEntry_schedule(ventry); 37.833 + } 37.834 + VarpEntry_unlock(ventry, flags); 37.835 + varp_solicit(skb, ventry->key.vnet); 37.836 + VarpEntry_lock(ventry, flags); 37.837 + 37.838 + if(ventry->state == VARP_STATE_INCOMPLETE){ 37.839 + if(skb_queue_len(&ventry->queue) >= ventry->queue_max){ 37.840 + struct sk_buff *oldskb; 37.841 + oldskb = ventry->queue.next; 37.842 + __skb_unlink(oldskb, &ventry->queue); 37.843 + dprintf("> purging skb=%p\n", oldskb); 37.844 + kfree_skb(oldskb); 37.845 + } 37.846 + __skb_queue_tail(&ventry->queue, skb); 37.847 + } else { 37.848 + err = VarpEntry_send(ventry, skb); 37.849 + } 37.850 + dprintf("< err=%d\n", err); 37.851 + return err; 37.852 +} 37.853 + 37.854 +/** Handle output for a ventry. Resolves the ventry 37.855 + * if necessary. 37.856 + * 37.857 + * @param ventry varp entry 37.858 + * @param skb skb to send 37.859 + * @return 0 on success, error code otherwise 37.860 + */ 37.861 +int VarpEntry_output(VarpEntry *ventry, struct sk_buff *skb){ 37.862 + int err = 0; 37.863 + 37.864 + switch(ventry->state){ 37.865 + case VARP_STATE_REACHABLE: 37.866 + err = VarpEntry_send(ventry, skb); 37.867 + break; 37.868 + default: 37.869 + err = VarpEntry_resolve(ventry, skb); 37.870 + break; 37.871 + } 37.872 + return err; 37.873 +} 37.874 + 37.875 +/** Process the output queue for a ventry. Sends the queued skbs if 37.876 + * the ventry is reachable, otherwise drops them. 37.877 + * 37.878 + * @param ventry varp entry 37.879 + */ 37.880 +void VarpEntry_process_queue(VarpEntry *ventry){ 37.881 + struct sk_buff *skb; 37.882 + for( ; ; ){ 37.883 + if(ventry->state != VARP_STATE_REACHABLE) break; 37.884 + skb = __skb_dequeue(&ventry->queue); 37.885 + if(!skb) break; 37.886 + VarpEntry_output(ventry, skb); 37.887 + } 37.888 + skb_queue_purge(&ventry->queue); 37.889 +} 37.890 + 37.891 +/** Update a ventry. Sets the address and state to those given 37.892 + * and sets the timestamp to 'now'. 37.893 + * 37.894 + * @param ventry varp entry 37.895 + * @param addr care-of address 37.896 + * @param state state 37.897 + * @return 0 on success, error code otherwise 37.898 + */ 37.899 +int VarpEntry_update(VarpEntry *ventry, u32 addr, int state){ 37.900 + int err = 0; 37.901 + unsigned long now = jiffies; 37.902 + unsigned long flags; 37.903 + 37.904 + dprintf("> addr=" IPFMT " state=%d\n", NIPQUAD(addr), state); 37.905 + //VarpEntry_print(ventry); 37.906 + VarpEntry_lock(ventry, flags); 37.907 + if(VarpEntry_get_flags(ventry, VARP_FLAG_PERMANENT)) goto exit; 37.908 + ventry->addr = addr; 37.909 + ventry->timestamp = now; 37.910 + ventry->state = state; 37.911 + VarpEntry_process_queue(ventry); 37.912 + exit: 37.913 + //dprintf("> "); VarpEntry_print(ventry); 37.914 + VarpEntry_unlock(ventry, flags); 37.915 + dprintf("< err=%d\n", err); 37.916 + return err; 37.917 +} 37.918 + 37.919 +int VarpTable_update(VarpTable *z, int vnet, Vmac *vmac, u32 addr, 37.920 + int state, int force){ 37.921 + int err = 0; 37.922 + VarpEntry *ventry; 37.923 + 37.924 + dprintf("> vnet=%d mac=" MACFMT " addr=" IPFMT " state=%d force=%d\n", 37.925 + vnet, MAC6TUPLE(vmac->mac), NIPQUAD(addr), state, force); 37.926 + ventry = VarpTable_lookup(z, vnet, vmac); 37.927 + if(force && !ventry){ 37.928 + dprintf("> No entry, adding\n"); 37.929 + ventry = VarpTable_add(z, vnet, vmac); 37.930 + } 37.931 + if(ventry){ 37.932 + dprintf("> Updating\n"); 37.933 + err = VarpEntry_update(ventry, addr, state); 37.934 + VarpEntry_decref(ventry); 37.935 + } else { 37.936 + dprintf("> No entry found\n"); 37.937 + err = -ENOENT; 37.938 + } 37.939 + dprintf("< err=%d\n", err); 37.940 + return err; 37.941 +} 37.942 + 37.943 +/** Update the ventry corresponding to the given varp header. 37.944 + * 37.945 + * @param z table 37.946 + * @param varph varp header 37.947 + * @param state state 37.948 + * @return 0 on success, -ENOENT if no entry found 37.949 + */ 37.950 +int VarpTable_update_entry(VarpTable *z, VarpHdr *varph, int state){ 37.951 + return VarpTable_update(z, ntohl(varph->vnet), &varph->vmac, varph->addr, state, 0); 37.952 +} 37.953 + 37.954 +int varp_update(int vnet, unsigned char *vmac, u32 addr){ 37.955 + if(!varp_table){ 37.956 + return -ENOSYS; 37.957 + } 37.958 + return VarpTable_update(varp_table, vnet, (Vmac*)vmac, addr, 37.959 + VARP_STATE_REACHABLE, 1); 37.960 +} 37.961 + 37.962 +/** Put old varp entries into the incomplete state. 37.963 + * Permanent entries are not changed. 37.964 + * If 'all' is non-zero, all non-permanent entries 37.965 + * are put into the incomplete state, regardless of age. 37.966 + * 37.967 + * @param z table 37.968 + * @param all reset all entries if non-zero 37.969 + */ 37.970 +void VarpTable_sweep(VarpTable *z, int all){ 37.971 + HashTable_for_decl(entry); 37.972 + VarpEntry *ventry; 37.973 + unsigned long now = jiffies; 37.974 + unsigned long old = now - VARP_ENTRY_TTL; 37.975 + unsigned long flags, vflags; 37.976 + 37.977 + //dprintf(">\n"); 37.978 + VarpTable_read_lock(z, flags); 37.979 + HashTable_for_each(entry, varp_table->table){ 37.980 + ventry = entry->value; 37.981 + VarpEntry_lock(ventry, vflags); 37.982 + if(!VarpEntry_get_flags(ventry, VARP_FLAG_PERMANENT) && 37.983 + (all || (ventry->timestamp < old))){ 37.984 + VarpEntry_process_queue(ventry); 37.985 + ventry->state = VARP_STATE_INCOMPLETE; 37.986 + } 37.987 + VarpEntry_unlock(ventry, vflags); 37.988 + } 37.989 + VarpTable_read_unlock(z, flags); 37.990 + //dprintf("<\n"); 37.991 +} 37.992 + 37.993 +/** Handle a varp request. Look for a vif with the requested 37.994 + * vnet and vmac. If find one, reply with the vnet, vmac and our 37.995 + * address. Otherwise do nothing. 37.996 + * 37.997 + * @param skb incoming message 37.998 + * @param varph varp message 37.999 + * @return 0 if ok, -ENOENT if no matching vif, or error code 37.1000 + */ 37.1001 +int varp_handle_request(struct sk_buff *skb, VarpHdr *varph){ 37.1002 + int err = -ENOENT; 37.1003 + u32 vnet; 37.1004 + Vmac *vmac; 37.1005 + Vif *vif = NULL; 37.1006 + 37.1007 + dprintf(">\n"); 37.1008 + vnet = ntohl(varph->vnet); 37.1009 + vmac = &varph->vmac; 37.1010 + dprintf("> vnet=%d vmac=" MACFMT "\n", vnet, MAC6TUPLE(vmac->mac)); 37.1011 + if(vif_lookup(vnet, vmac, &vif)) goto exit; 37.1012 + varp_send(VARP_OP_ANNOUNCE, skb->dev, skb, vnet, vmac); 37.1013 + vif_decref(vif); 37.1014 + exit: 37.1015 + dprintf("< err=%d\n", err); 37.1016 + return err; 37.1017 +} 37.1018 + 37.1019 +/** Announce the vnet and vmac of a vif (gratuitous varp). 37.1020 + * 37.1021 + * @param dev device to send on (may be null) 37.1022 + * @param vif vif 37.1023 + * @return 0 on success, error code otherwise 37.1024 + */ 37.1025 +int varp_announce_vif(struct net_device *dev, Vif *vif){ 37.1026 + int err = 0; 37.1027 + dprintf(">\n"); 37.1028 + if(!varp_table){ 37.1029 + err = -ENOSYS; 37.1030 + goto exit; 37.1031 + } 37.1032 + err = varp_send(VARP_OP_ANNOUNCE, dev, NULL, vif->vnet, &vif->vmac); 37.1033 + exit: 37.1034 + dprintf("< err=%d\n", err); 37.1035 + return err; 37.1036 +} 37.1037 + 37.1038 +/** Handle a varp announce message. 37.1039 + * Update the matching ventry if we have one. 37.1040 + * 37.1041 + * @param skb incoming message 37.1042 + * @param varp message 37.1043 + * @return 0 if OK, -ENOENT if no matching entry 37.1044 + */ 37.1045 +int varp_handle_announce(struct sk_buff *skb, VarpHdr *varph){ 37.1046 + int err = 0; 37.1047 + 37.1048 + dprintf(">\n"); 37.1049 + err = VarpTable_update_entry(varp_table, varph, VARP_STATE_REACHABLE); 37.1050 + dprintf("< err=%d\n", err); 37.1051 + return err; 37.1052 +} 37.1053 + 37.1054 +/** Handle an incoming varp message. 37.1055 + * 37.1056 + * @param skb incoming message 37.1057 + * @return 0 if OK, error code otherwise 37.1058 + */ 37.1059 +int varp_handle_message(struct sk_buff *skb){ 37.1060 + // Assume h. nh set, skb->data point after udp hdr (at varphdr). 37.1061 + int err = -EINVAL, mine = 0; 37.1062 + VarpHdr *varph = (void*)(skb->h.uh + 1); 37.1063 + 37.1064 + dprintf(">\n"); 37.1065 + if(!varp_table){ 37.1066 + err = -ENOSYS; 37.1067 + goto exit; 37.1068 + } 37.1069 + if(MULTICAST(skb->nh.iph->daddr) && 37.1070 + (skb->nh.iph->daddr != varp_mcast_addr)){ 37.1071 + // Ignore multicast packets not addressed to us. 37.1072 + err = 0; 37.1073 + dprintf("> daddr=" IPFMT " mcaddr=" IPFMT "\n", 37.1074 + NIPQUAD(skb->nh.iph->daddr), NIPQUAD(varp_mcast_addr)); 37.1075 + goto exit; 37.1076 + } 37.1077 + if(skb->len < sizeof(*varph)){ 37.1078 + wprintf("> Varp msg too short: %d < %d\n", skb->len, sizeof(*varph)); 37.1079 + goto exit; 37.1080 + } 37.1081 + mine = 1; 37.1082 + if(varph->id != htons(VARP_ID)){ 37.1083 + // It's not varp at all - ignore it. 37.1084 + wprintf("> Unknown id: %d \n", ntohs(varph->id)); 37.1085 + goto exit; 37.1086 + } 37.1087 + if(1){ 37.1088 + dprintf("> saddr=" IPFMT " daddr=" IPFMT "\n", 37.1089 + NIPQUAD(skb->nh.iph->saddr), NIPQUAD(skb->nh.iph->daddr)); 37.1090 + dprintf("> sport=%u dport=%u\n", ntohs(skb->h.uh->source), ntohs(skb->h.uh->dest)); 37.1091 + dprintf("> opcode=%d vnet=%u vmac=" MACFMT " addr=" IPFMT "\n", 37.1092 + ntohs(varph->opcode), 37.1093 + ntohl(varph->vnet), 37.1094 + MAC6TUPLE(varph->vmac.mac), 37.1095 + NIPQUAD(varph->addr)); 37.1096 + varp_dprint(); 37.1097 + } 37.1098 + switch(ntohs(varph->opcode)){ 37.1099 + case VARP_OP_REQUEST: 37.1100 + err = varp_handle_request(skb, varph); 37.1101 + break; 37.1102 + case VARP_OP_ANNOUNCE: 37.1103 + err = varp_handle_announce(skb, varph); 37.1104 + break; 37.1105 + default: 37.1106 + wprintf("> Unknown opcode: %d \n", ntohs(varph->opcode)); 37.1107 + break; 37.1108 + } 37.1109 + exit: 37.1110 + if(mine) err = 1; 37.1111 + dprintf("< err=%d\n", err); 37.1112 + return err; 37.1113 +} 37.1114 + 37.1115 +/** Send an outgoing packet on the appropriate vnet tunnel. 37.1116 + * 37.1117 + * @param skb outgoing message 37.1118 + * @param vnet vnet (host order) 37.1119 + * @return 0 on success, error code otherwise 37.1120 + */ 37.1121 +int varp_output(struct sk_buff *skb, u32 vnet){ 37.1122 + int err = 0; 37.1123 + unsigned char *mac = NULL; 37.1124 + Vmac *vmac = NULL; 37.1125 + VarpEntry *ventry = NULL; 37.1126 + 37.1127 + dprintf("> skb=%p vnet=%u\n", skb, vnet); 37.1128 + if(!varp_table){ 37.1129 + err = -ENOSYS; 37.1130 + goto exit; 37.1131 + } 37.1132 + dprintf("> skb.mac=%p\n", skb->mac.raw); 37.1133 + if(!skb->mac.raw){ 37.1134 + wprintf("> No ethhdr in skb!\n"); 37.1135 + err = -EINVAL; 37.1136 + goto exit; 37.1137 + } 37.1138 + mac = MAC_ETH(skb)->h_dest; 37.1139 + vmac = (Vmac*)mac; 37.1140 + if(mac_is_multicast(mac)){ 37.1141 + err = vnet_tunnel_send(vnet, varp_mcast_addr, skb); 37.1142 + } else { 37.1143 + ventry = VarpTable_lookup(varp_table, vnet, vmac); 37.1144 + if(!ventry){ 37.1145 + ventry = VarpTable_add(varp_table, vnet, vmac); 37.1146 + } 37.1147 + if(ventry){ 37.1148 + unsigned long flags; 37.1149 + VarpEntry_lock(ventry, flags); 37.1150 + err = VarpEntry_output(ventry, skb); 37.1151 + VarpEntry_unlock(ventry, flags); 37.1152 + VarpEntry_decref(ventry); 37.1153 + } else { 37.1154 + err = -ENOMEM; 37.1155 + } 37.1156 + } 37.1157 + exit: 37.1158 + dprintf("< err=%d\n", err); 37.1159 + return err; 37.1160 +} 37.1161 + 37.1162 +/** Set the varp multicast address (after initialization). 37.1163 + * 37.1164 + * @param addr address (network order) 37.1165 + * @return 0 on success, error code otherwise 37.1166 + */ 37.1167 +int varp_set_mcast_addr(uint32_t addr){ 37.1168 + int err = 0; 37.1169 + varp_close(); 37.1170 + varp_mcast_addr = addr; 37.1171 + err = varp_open(varp_mcast_addr, varp_ucast_addr, varp_port); 37.1172 + return err; 37.1173 +} 37.1174 + 37.1175 +/** Initialize the varp multicast address from a module parameter. 37.1176 + * 37.1177 + * @param s address in IPv4 notation 37.1178 + * @return 0 on success, error code otherwise 37.1179 + */ 37.1180 +static void varp_init_mcast_addr(char *s){ 37.1181 + unsigned long v = 0; 37.1182 + 37.1183 + dprintf("> %s\n", s); 37.1184 + if(s && (get_inet_addr(s, &v) >= 0)){ 37.1185 + varp_mcast_addr = (u32)v; 37.1186 + } else { 37.1187 + varp_mcast_addr = htonl(VARP_MCAST_ADDR); 37.1188 + } 37.1189 +} 37.1190 + 37.1191 +/** Initialize the varp cache. 37.1192 + * 37.1193 + * @return 0 on success, error code otherwise 37.1194 + */ 37.1195 +int varp_init(void){ 37.1196 + int err = 0; 37.1197 + struct net_device *dev = NULL; 37.1198 + 37.1199 + dprintf(">\n"); 37.1200 + varp_table = VarpTable_new(); 37.1201 + if(!varp_table){ 37.1202 + err = -ENOMEM; 37.1203 + goto exit; 37.1204 + } 37.1205 + varp_init_mcast_addr(varp_mcaddr); 37.1206 + err = vnet_get_device(varp_device, &dev); 37.1207 + dprintf("> vnet_get_device(%s)=%d\n", varp_device, err); 37.1208 + if(err) goto exit; 37.1209 + err = vnet_get_device_address(dev, &varp_ucast_addr); 37.1210 + dprintf("> vnet_get_device_address()=%d\n", err); 37.1211 + if(err) goto exit; 37.1212 + varp_port = htons(VARP_PORT); 37.1213 + 37.1214 + err = varp_open(varp_mcast_addr, varp_ucast_addr, varp_port); 37.1215 + dprintf("> varp_open()=%d\n", err); 37.1216 + exit: 37.1217 + if(dev) dev_put(dev); 37.1218 + dprintf("< err=%d\n", err); 37.1219 + return err; 37.1220 +} 37.1221 + 37.1222 +/** Close the varp cache. 37.1223 + */ 37.1224 +void varp_exit(void){ 37.1225 + dprintf(">\n"); 37.1226 + varp_close(); 37.1227 + if(varp_table){ 37.1228 + VarpTable *z = varp_table; 37.1229 + varp_table = NULL; 37.1230 + VarpTable_free(z); 37.1231 + } 37.1232 + dprintf("<\n"); 37.1233 +} 37.1234 + 37.1235 +MODULE_PARM(varp_mcaddr, "s"); 37.1236 +MODULE_PARM_DESC(varp_mcaddr, "VARP multicast address"); 37.1237 + 37.1238 +MODULE_PARM(varp_device, "s"); 37.1239 +MODULE_PARM_DESC(varp_device, "VARP network device");
38.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 38.2 +++ b/tools/vnet/vnet-module/varp.h Mon Nov 22 16:49:15 2004 +0000 38.3 @@ -0,0 +1,144 @@ 38.4 +/* 38.5 + * Copyright (C) 2004 Mike Wray <mike.wray@hp.com> 38.6 + * 38.7 + * This program is free software; you can redistribute it and/or modify 38.8 + * it under the terms of the GNU General Public License as published by the 38.9 + * Free Software Foundation; either version 2 of the License, or (at your 38.10 + * option) any later version. 38.11 + * 38.12 + * This program is distributed in the hope that it will be useful, but 38.13 + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 38.14 + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 38.15 + * for more details. 38.16 + * 38.17 + * You should have received a copy of the GNU General Public License along 38.18 + * with this program; if not, write to the Free software Foundation, Inc., 38.19 + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA 38.20 + * 38.21 + */ 38.22 + 38.23 +#ifndef _VNET_VARP_H 38.24 +#define _VNET_VARP_H 38.25 + 38.26 +#define CONFIG_VARP_GRATUITOUS 1 38.27 + 38.28 +struct net_device; 38.29 +struct sk_buff; 38.30 +struct Vif; 38.31 + 38.32 +#define DEVICE "xen-br0" 38.33 + 38.34 +extern int vnet_get_device(const char *name, struct net_device **dev); 38.35 +extern int vnet_get_device_address(struct net_device *dev, u32 *addr); 38.36 + 38.37 +extern int varp_handle_message(struct sk_buff *skb); 38.38 +extern int varp_output(struct sk_buff *skb, u32 vnet); 38.39 +extern int varp_update(int vnet, unsigned char *vmac, u32 addr); 38.40 + 38.41 +extern int varp_init(void); 38.42 +extern void varp_exit(void); 38.43 + 38.44 +extern int varp_open(u32 mcaddr, u32 addr, u16 port); 38.45 +extern void varp_close(void); 38.46 +extern int varp_set_mcast_addr(u32 addr); 38.47 + 38.48 +extern void varp_print(void); 38.49 + 38.50 +extern int varp_announce_vif(struct net_device *dev, struct Vif *vif); 38.51 +//extern int varp_announce_vifs(struct net_device *dev, struct task_struct *domain); 38.52 + 38.53 +extern u32 varp_mcast_addr; 38.54 + 38.55 + 38.56 +/* MAC broadcast addr is ff-ff-ff-ff-ff-ff (all 1's). 38.57 + * MAC multicast addr has low bit 1, i.e. 01-00-00-00-00-00. 38.58 + */ 38.59 + 38.60 +/** Test if a MAC address is a multicast or broadcast address. 38.61 + * 38.62 + * @param mac address 38.63 + * @return 1 if it is, 0 if not 38.64 + */ 38.65 +static inline int mac_is_multicast(u8 mac[ETH_ALEN]){ 38.66 + return mac[0] & 1; 38.67 +} 38.68 + 38.69 +/** Test if a MAC address is the broadcast address. 38.70 + * 38.71 + * @param mac address 38.72 + * @return 1 if it is, 0 if not 38.73 + */ 38.74 +static inline int mac_is_broadcast(u8 mac[ETH_ALEN]){ 38.75 + u8 mac_bcast_val[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; 38.76 + return memcmp(mac, mac_bcast_val, ETH_ALEN) == 0; 38.77 +} 38.78 + 38.79 +/** Test if a MAC address is the all-zero address. 38.80 + * 38.81 + * @param mac address 38.82 + * @return 1 if it is, 0 if not 38.83 + */ 38.84 +static inline int mac_is_zero(u8 mac[ETH_ALEN]){ 38.85 + u8 mac_zero_val[ETH_ALEN] = {}; 38.86 + return memcmp(mac, mac_zero_val, ETH_ALEN) == 0; 38.87 +} 38.88 + 38.89 +/** Print format for a mac address. */ 38.90 +#define MACFMT "%02x:%02x:%02x:%02x:%02x:%02x" 38.91 + 38.92 +#define MAC6TUPLE(_mac) (_mac)[0], (_mac)[1], (_mac)[2], (_mac)[3], (_mac)[4], (_mac)[5] 38.93 + 38.94 +/** Get the subnet defined by a netmask and addr. 38.95 + * 38.96 + * @param netmask subnet netmask 38.97 + * @param addr subnet address 38.98 + * @return subnet 38.99 + */ 38.100 +static inline u32 subnet_net(u32 netmask, u32 addr){ 38.101 + return netmask & addr; 38.102 +} 38.103 + 38.104 +/** Get the address within a subnet. 38.105 + * 38.106 + * @param netmask subnet netmask 38.107 + * @param addr address 38.108 + * @return address within the subnet 38.109 + */ 38.110 +static inline u32 subnet_addr(u32 netmask, u32 addr){ 38.111 + return ~netmask & addr; 38.112 +} 38.113 + 38.114 +/** Get the broadcast address for a subnet. 38.115 + * 38.116 + * @param netmask subnet netmask 38.117 + * @param netaddr subnet address 38.118 + * @return subnet broadcast address 38.119 + */ 38.120 +static inline u32 subnet_broadcast_addr(u32 netmask, u32 netaddr){ 38.121 + return subnet_net(netmask, netaddr) | ~netmask; 38.122 +} 38.123 + 38.124 +/** Test if an address corresponds to a subnet broadcast. 38.125 + * True if the address within the subnet is all 1's (in binary). 38.126 + * (even if the address is not in the subnet). 38.127 + * 38.128 + * @param netmask subnet mask 38.129 + * @param add address 38.130 + * @return 1 if it does, 0 otherwise 38.131 + */ 38.132 +static inline int subnet_broadcast(u32 netmask, u32 addr){ 38.133 + return subnet_addr(netmask, INADDR_ANY) == subnet_addr(netmask, addr); 38.134 +} 38.135 + 38.136 +/** Test if an address is in a subnet. 38.137 + * 38.138 + * @param netmask subnet mask 38.139 + * @param netaddr subnet address 38.140 + * @param addr address 38.141 + * @return 1 if it is, 0 otherwise 38.142 + */ 38.143 +static inline int subnet_local(u32 netmask, u32 netaddr, u32 addr){ 38.144 + return subnet_net(netmask, netaddr) == subnet_net(netmask, addr); 38.145 +} 38.146 + 38.147 +#endif /* ! _VNET_VARP_H */
39.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 39.2 +++ b/tools/vnet/vnet-module/varp_socket.c Mon Nov 22 16:49:15 2004 +0000 39.3 @@ -0,0 +1,639 @@ 39.4 +/* 39.5 + * Copyright (C) 2004 Mike Wray <mike.wray@hp.com> 39.6 + * 39.7 + * This program is free software; you can redistribute it and/or modify 39.8 + * it under the terms of the GNU General Public License as published by the 39.9 + * Free Software Foundation; either version 2 of the License, or (at your 39.10 + * option) any later version. 39.11 + * 39.12 + * This program is distributed in the hope that it will be useful, but 39.13 + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 39.14 + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 39.15 + * for more details. 39.16 + * 39.17 + * You should have received a copy of the GNU General Public License along 39.18 + * with this program; if not, write to the Free software Foundation, Inc., 39.19 + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA 39.20 + * 39.21 + */ 39.22 +#include <linux/kernel.h> 39.23 +#include <linux/types.h> 39.24 +#include <linux/version.h> 39.25 + 39.26 +#include <asm/uaccess.h> 39.27 +#include <linux/net.h> 39.28 +#include <linux/in.h> 39.29 +#include <linux/sched.h> 39.30 +#include <linux/file.h> 39.31 +#include <linux/version.h> 39.32 +#include <linux/smp_lock.h> 39.33 +#include <net/sock.h> 39.34 + 39.35 +#include <if_varp.h> 39.36 +#include <varp.h> 39.37 + 39.38 +/* Get macros needed to define system calls as functions in the kernel. */ 39.39 +#define __KERNEL_SYSCALLS__ 39.40 +static int errno; 39.41 +#include <linux/unistd.h> 39.42 + 39.43 +#define MODULE_NAME "VARP" 39.44 +#define DEBUG 1 39.45 +#undef DEBUG 39.46 +#include "debug.h" 39.47 + 39.48 +// Compensate for struct sock fields having 'sk_' added 39.49 +// to them in 2.6. 39.50 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) 39.51 + 39.52 +#define SK_RECEIVE_QUEUE sk_receive_queue 39.53 +#define SK_SLEEP sk_sleep 39.54 + 39.55 +#else 39.56 + 39.57 +#define SK_RECEIVE_QUEUE receive_queue 39.58 +#define SK_SLEEP sleep 39.59 + 39.60 +#endif 39.61 + 39.62 +/** @file 39.63 + * Support for the VARP udp sockets. 39.64 + */ 39.65 + 39.66 +static inline mm_segment_t change_fs(mm_segment_t fs){ 39.67 + mm_segment_t oldfs = get_fs(); 39.68 + set_fs(fs); 39.69 + return oldfs; 39.70 +} 39.71 + 39.72 +/* Replicate the user-space socket API. 39.73 + * The parts we need anyway. 39.74 + */ 39.75 + 39.76 +/* Define the socketcall() syscall. 39.77 + * Multiplexes all the socket-related calls. 39.78 + * 39.79 + * @param call socket call id 39.80 + * @param args arguments (upto 6) 39.81 + * @return call-dependent value 39.82 + */ 39.83 +static inline _syscall2(int, socketcall, 39.84 + int, call, 39.85 + unsigned long *, args) 39.86 + 39.87 +int socket(int family, int type, int protocol){ 39.88 + unsigned long args[6]; 39.89 + 39.90 + args[0] = (unsigned long)family; 39.91 + args[1] = (unsigned long)type; 39.92 + args[2] = (unsigned long)protocol; 39.93 + return socketcall(SYS_SOCKET, args); 39.94 +} 39.95 + 39.96 +int bind(int fd, struct sockaddr *umyaddr, int addrlen){ 39.97 + unsigned long args[6]; 39.98 + 39.99 + args[0] = (unsigned long)fd; 39.100 + args[1] = (unsigned long)umyaddr; 39.101 + args[2] = (unsigned long)addrlen; 39.102 + return socketcall(SYS_BIND, args); 39.103 +} 39.104 + 39.105 +int connect(int fd, struct sockaddr *uservaddr, int addrlen){ 39.106 + unsigned long args[6]; 39.107 + 39.108 + args[0] = (unsigned long)fd; 39.109 + args[1] = (unsigned long)uservaddr; 39.110 + args[2] = (unsigned long)addrlen; 39.111 + return socketcall(SYS_CONNECT, args); 39.112 +} 39.113 + 39.114 +int sendto(int fd, void * buff, size_t len, 39.115 + unsigned flags, struct sockaddr *addr, 39.116 + int addr_len){ 39.117 + unsigned long args[6]; 39.118 + 39.119 + args[0] = (unsigned long)fd; 39.120 + args[1] = (unsigned long)buff; 39.121 + args[2] = (unsigned long)len; 39.122 + args[3] = (unsigned long)flags; 39.123 + args[4] = (unsigned long)addr; 39.124 + args[5] = (unsigned long)addr_len; 39.125 + return socketcall(SYS_SENDTO, args); 39.126 +} 39.127 + 39.128 +int recvfrom(int fd, void * ubuf, size_t size, 39.129 + unsigned flags, struct sockaddr *addr, 39.130 + int *addr_len){ 39.131 + unsigned long args[6]; 39.132 + 39.133 + args[0] = (unsigned long)fd; 39.134 + args[1] = (unsigned long)ubuf; 39.135 + args[2] = (unsigned long)size; 39.136 + args[3] = (unsigned long)flags; 39.137 + args[4] = (unsigned long)addr; 39.138 + args[5] = (unsigned long)addr_len; 39.139 + return socketcall(SYS_RECVFROM, args); 39.140 +} 39.141 + 39.142 +int setsockopt(int fd, int level, int optname, void *optval, int optlen){ 39.143 + unsigned long args[6]; 39.144 + 39.145 + args[0] = (unsigned long)fd; 39.146 + args[1] = (unsigned long)level; 39.147 + args[2] = (unsigned long)optname; 39.148 + args[3] = (unsigned long)optval; 39.149 + args[4] = (unsigned long)optlen; 39.150 + return socketcall(SYS_SETSOCKOPT, args); 39.151 +} 39.152 + 39.153 +int getsockopt(int fd, int level, int optname, void *optval, int *optlen){ 39.154 + unsigned long args[6]; 39.155 + 39.156 + args[0] = (unsigned long)fd; 39.157 + args[1] = (unsigned long)level; 39.158 + args[2] = (unsigned long)optname; 39.159 + args[3] = (unsigned long)optval; 39.160 + args[4] = (unsigned long)optlen; 39.161 + return socketcall(SYS_GETSOCKOPT, args); 39.162 +} 39.163 + 39.164 +int shutdown(int fd, int how){ 39.165 + unsigned long args[6]; 39.166 + 39.167 + args[0] = (unsigned long)fd; 39.168 + args[1] = (unsigned long)how; 39.169 + return socketcall(SYS_SHUTDOWN, args); 39.170 +} 39.171 + 39.172 +int getsockname(int fd, struct sockaddr *usockaddr, int *usockaddr_len){ 39.173 + unsigned long args[6]; 39.174 + 39.175 + args[0] = (unsigned long)fd; 39.176 + args[1] = (unsigned long)usockaddr; 39.177 + args[2] = (unsigned long)usockaddr_len; 39.178 + return socketcall(SYS_GETSOCKNAME, args); 39.179 +} 39.180 + 39.181 +/*============================================================================*/ 39.182 +/** Socket flags. */ 39.183 +enum { 39.184 + VSOCK_REUSE = 1, 39.185 + VSOCK_BIND = 2, 39.186 + VSOCK_CONNECT = 4, 39.187 + VSOCK_BROADCAST = 8, 39.188 + VSOCK_MULTICAST = 16, 39.189 + }; 39.190 + 39.191 +/** Convert socket flags to a string. 39.192 + * 39.193 + * @param flags flags 39.194 + * @return static string 39.195 + */ 39.196 +char * socket_flags(int flags){ 39.197 + static char s[6]; 39.198 + int i = 0; 39.199 + s[i++] = (flags & VSOCK_CONNECT ? 'c' : '-'); 39.200 + s[i++] = (flags & VSOCK_BIND ? 'b' : '-'); 39.201 + s[i++] = (flags & VSOCK_REUSE ? 'r' : '-'); 39.202 + s[i++] = (flags & VSOCK_BROADCAST ? 'B' : '-'); 39.203 + s[i++] = (flags & VSOCK_MULTICAST ? 'M' : '-'); 39.204 + s[i++] = '\0'; 39.205 + return s; 39.206 +} 39.207 + 39.208 +/** The varp multicast socket. */ 39.209 +int varp_mcast_sock = -1; 39.210 + 39.211 +/** The varp unicast socket. */ 39.212 +int varp_ucast_sock = -1; 39.213 + 39.214 +/** Control flag for whether varp should be running. 39.215 + * If this is set 0 then the varp thread will notice and 39.216 + * (eventually) exit. This is indicated by setting varp_running 39.217 + * to 0. 39.218 + */ 39.219 +atomic_t varp_run = ATOMIC_INIT(0); 39.220 + 39.221 +/** State flag indicating whether the varp thread is running. */ 39.222 +atomic_t varp_running = ATOMIC_INIT(0); 39.223 + 39.224 +/** Set socket option to reuse address. 39.225 + * 39.226 + * @param sock socket 39.227 + * @param reuse flag 39.228 + * @return 0 on success, error code otherwise 39.229 + */ 39.230 +int setsock_reuse(int sock, int reuse){ 39.231 + int err = 0; 39.232 + err = setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof(reuse)); 39.233 + if(err < 0){ 39.234 + eprintf("> setsockopt SO_REUSEADDR: %d %d\n", err, errno); 39.235 + } 39.236 + return err; 39.237 +} 39.238 + 39.239 +/** Set socket broadcast option. 39.240 + * 39.241 + * @param sock socket 39.242 + * @param bcast flag 39.243 + * @return 0 on success, error code otherwise 39.244 + */ 39.245 +int setsock_broadcast(int sock, int bcast){ 39.246 + int err = 0; 39.247 + err = setsockopt(sock, SOL_SOCKET, SO_BROADCAST, &bcast, sizeof(bcast)); 39.248 + if(err < 0){ 39.249 + eprintf("> setsockopt SO_BROADCAST: %d %d\n", err, errno); 39.250 + } 39.251 + return err; 39.252 +} 39.253 + 39.254 +/** Join a socket to a multicast group. 39.255 + * 39.256 + * @param sock socket 39.257 + * @param saddr multicast address 39.258 + * @return 0 on success, error code otherwise 39.259 + */ 39.260 +int setsock_multicast(int sock, uint32_t saddr){ 39.261 + int err = 0; 39.262 + struct net_device *dev = NULL; 39.263 + u32 addr = 0; 39.264 + struct ip_mreqn mreq = {}; 39.265 + int mloop = 0; 39.266 + 39.267 + err = vnet_get_device(DEVICE, &dev); 39.268 + if(err){ 39.269 + eprintf("> error getting device: %d %d\n", err, errno); 39.270 + goto exit; 39.271 + } 39.272 + err = vnet_get_device_address(dev, &addr); 39.273 + if(err){ 39.274 + eprintf("> error getting device address: %d %d\n", err, errno); 39.275 + goto exit; 39.276 + } 39.277 + // See 'man 7 ip' for these options. 39.278 + mreq.imr_multiaddr.s_addr = saddr; // IP multicast address. 39.279 + //mreq.imr_address.s_addr = addr; // Interface IP address. 39.280 + mreq.imr_address.s_addr = INADDR_ANY; // Interface IP address. 39.281 + mreq.imr_ifindex = 0; // Interface index (0 means any). 39.282 + dprintf("> saddr=%u.%u.%u.%u addr=%u.%u.%u.%u ifindex=%d\n", 39.283 + NIPQUAD(saddr), NIPQUAD(addr), mreq.imr_ifindex); 39.284 + err = setsockopt(sock, SOL_IP, IP_MULTICAST_LOOP, &mloop, sizeof(mloop)); 39.285 + if(err < 0){ 39.286 + eprintf("> setsockopt IP_MULTICAST_LOOP: %d %d\n", err, errno); 39.287 + goto exit; 39.288 + } 39.289 + err = setsockopt(sock, SOL_IP, IP_ADD_MEMBERSHIP, &mreq, sizeof(mreq)); 39.290 + if(err < 0){ 39.291 + eprintf("> setsockopt IP_ADD_MEMBERSHIP: %d %d\n", err, errno); 39.292 + goto exit; 39.293 + } 39.294 + exit: 39.295 + err = 0; //todo: remove hack 39.296 + return err; 39.297 +} 39.298 + 39.299 +/** Set a socket's multicast ttl (default is 1). 39.300 + * @param sock socket 39.301 + * @param ttl ttl 39.302 + * @return 0 on success, error code otherwise 39.303 + */ 39.304 +int setsock_multicast_ttl(int sock, uint8_t ttl){ 39.305 + int err = 0; 39.306 + err = setsockopt(sock, SOL_IP, IP_MULTICAST_TTL, &ttl, sizeof(ttl)); 39.307 + return err; 39.308 +} 39.309 + 39.310 +/** Create a socket. 39.311 + * The flags can include VSOCK_REUSE, VSOCK_BROADCAST, VSOCK_CONNECT. 39.312 + * 39.313 + * @param socktype socket type 39.314 + * @param saddr address 39.315 + * @param port port 39.316 + * @param flags flags 39.317 + * @param val return value for the socket connection 39.318 + * @return 0 on success, error code otherwise 39.319 + */ 39.320 +int create_socket(int socktype, uint32_t saddr, uint32_t port, int flags, int *val){ 39.321 + int err = 0; 39.322 + int sock; 39.323 + struct sockaddr_in addr_in; 39.324 + struct sockaddr *addr = (struct sockaddr *)&addr_in; 39.325 + int addr_n = sizeof(addr_in); 39.326 + int reuse, bcast; 39.327 + int sockproto = 0; 39.328 + 39.329 + //dprintf(">\n"); 39.330 + reuse = (flags & VSOCK_REUSE); 39.331 + bcast = (flags & VSOCK_BROADCAST); 39.332 + addr_in.sin_family = AF_INET; 39.333 + addr_in.sin_addr.s_addr = saddr; 39.334 + addr_in.sin_port = port; 39.335 + dprintf("> flags=%s addr=%u.%u.%u.%u port=%d\n", 39.336 + socket_flags(flags), 39.337 + NIPQUAD(saddr), ntohs(port)); 39.338 + 39.339 + switch(socktype){ 39.340 + case SOCK_DGRAM: sockproto = IPPROTO_UDP; break; 39.341 + case SOCK_STREAM: sockproto = IPPROTO_TCP; break; 39.342 + } 39.343 + sock = socket(AF_INET, socktype, sockproto); 39.344 + if(sock < 0) goto exit; 39.345 + if(reuse){ 39.346 + err = setsock_reuse(sock, reuse); 39.347 + if(err < 0) goto exit; 39.348 + } 39.349 + if(bcast){ 39.350 + err = setsock_broadcast(sock, bcast); 39.351 + if(err < 0) goto exit; 39.352 + } 39.353 + if(flags & VSOCK_MULTICAST){ 39.354 + err = setsock_multicast(sock, saddr); 39.355 + if(err < 0) goto exit; 39.356 + } 39.357 + if(flags & VSOCK_CONNECT){ 39.358 + err = connect(sock, addr, addr_n); 39.359 + if(err < 0) goto exit; 39.360 + } 39.361 + if(flags & VSOCK_BIND){ 39.362 + err = bind(sock, addr, addr_n); 39.363 + if(err < 0) goto exit; 39.364 + } 39.365 + exit: 39.366 + *val = (err ? -1 : sock); 39.367 + if(err) eprintf("> err=%d errno=%d\n", err, errno); 39.368 + return err; 39.369 +} 39.370 + 39.371 +/** Open the varp multicast socket. 39.372 + * 39.373 + * @param mcaddr multicast address 39.374 + * @param saddr address 39.375 + * @param port port 39.376 + * @param val return parameter for the socket 39.377 + * @return 0 on success, error code otherwise 39.378 + */ 39.379 +int varp_mcast_open(uint32_t mcaddr, uint32_t saddr, uint16_t port, int *val){ 39.380 + int err = 0; 39.381 + int flags = VSOCK_REUSE; 39.382 + int multicast = MULTICAST(mcaddr); 39.383 + int sock = 0; 39.384 + struct sockaddr_in addr_in; 39.385 + struct sockaddr *addr = (struct sockaddr *)&addr_in; 39.386 + int addr_n = sizeof(addr_in); 39.387 + 39.388 + dprintf(">\n"); 39.389 + flags |= VSOCK_MULTICAST; 39.390 + flags |= VSOCK_BROADCAST; 39.391 + 39.392 + err = create_socket(SOCK_DGRAM, mcaddr, port, flags, &sock); 39.393 + if(err < 0) goto exit; 39.394 + if(multicast){ 39.395 + err = setsock_multicast_ttl(sock, 1); 39.396 + if(err < 0) goto exit; 39.397 + } 39.398 + if(0){ 39.399 + addr_in.sin_family = AF_INET; 39.400 + addr_in.sin_addr.s_addr = saddr; 39.401 + addr_in.sin_port = port; 39.402 + err = bind(sock, addr, addr_n); 39.403 + if(err < 0){ 39.404 + eprintf("> bind: %d %d\n", err, errno); 39.405 + goto exit; 39.406 + } 39.407 + } 39.408 + if(0){ 39.409 + struct sockaddr_in self = {}; 39.410 + int self_n; 39.411 + getsockname(sock, (struct sockaddr *)&self, &self_n); 39.412 + dprintf("> sockname sock=%d addr=%u.%u.%u.%u port=%d\n", 39.413 + sock, NIPQUAD(saddr), ntohs(port)); 39.414 + } 39.415 + exit: 39.416 + if(err){ 39.417 + shutdown(sock, 2); 39.418 + } 39.419 + *val = (err ? -1 : sock); 39.420 + dprintf("< err=%d val=%d\n", err, *val); 39.421 + return err; 39.422 +} 39.423 + 39.424 +/** Open the varp unicast socket. 39.425 + * 39.426 + * @param addr address 39.427 + * @param port port 39.428 + * @param val return parameter for the socket 39.429 + * @return 0 on success, error code otherwise 39.430 + */ 39.431 +int varp_ucast_open(uint32_t addr, u16 port, int *val){ 39.432 + int err = 0; 39.433 + int flags = VSOCK_BIND | VSOCK_REUSE; 39.434 + dprintf(">\n"); 39.435 + err = create_socket(SOCK_DGRAM, addr, port, flags, val); 39.436 + dprintf("< err=%d val=%d\n", err, *val); 39.437 + return err; 39.438 +} 39.439 + 39.440 +/* Here because inline in 'socket.c'. */ 39.441 +#ifndef sockfd_put 39.442 +#define sockfd_put(sock) fput((sock)->file) 39.443 +#endif 39.444 + 39.445 +/** Get the next skb from a socket's receive queue. 39.446 + * 39.447 + * @param fd socket file descriptor 39.448 + * @return skb or NULL 39.449 + */ 39.450 +static struct sk_buff *get_sock_skb(int fd){ 39.451 + int err = 0; 39.452 + struct sk_buff *skb = NULL; 39.453 + struct socket *sock = NULL; 39.454 + 39.455 + sock = sockfd_lookup(fd, &err); 39.456 + if (!sock){ 39.457 + dprintf("> no sock for fd=%d\n", fd); 39.458 + goto exit; 39.459 + } 39.460 + skb = skb_dequeue(&sock->sk->SK_RECEIVE_QUEUE); 39.461 + //skb = skb_recv_datagram(sock->sk, 0, 1, &recv_err); 39.462 + sockfd_put(sock); 39.463 + exit: 39.464 + return skb; 39.465 +} 39.466 + 39.467 +/** Handle the next skb on a socket (if any). 39.468 + * 39.469 + * @param fd socket file descriptor 39.470 + * @return 1 if there was an skb, 0 otherwise 39.471 + */ 39.472 +static int handle_sock_skb(int fd){ 39.473 + int ret = 0; 39.474 + struct sk_buff *skb = get_sock_skb(fd); 39.475 + if(skb){ 39.476 + ret = 1; 39.477 + dprintf("> skb fd=%d skb=%p\n", fd, skb); 39.478 + varp_handle_message(skb); 39.479 + kfree_skb(skb); 39.480 + } 39.481 + return ret; 39.482 +} 39.483 + 39.484 +/** Add a wait queue to a socket. 39.485 + * 39.486 + * @param fd socket file descriptor 39.487 + * @param waitq queue 39.488 + * @return 0 on success, error code otherwise 39.489 + */ 39.490 +int sock_add_wait_queue(int fd, wait_queue_t *waitq){ 39.491 + int err = 0; 39.492 + struct socket *sock = NULL; 39.493 + 39.494 + dprintf("> fd=%d\n", fd); 39.495 + sock = sockfd_lookup(fd, &err); 39.496 + if (!sock) goto exit; 39.497 + add_wait_queue(sock->sk->SK_SLEEP, waitq); 39.498 + sockfd_put(sock); 39.499 + exit: 39.500 + dprintf("< err=%d\n", err); 39.501 + return err; 39.502 +} 39.503 + 39.504 +/** Remove a wait queue from a socket. 39.505 + * 39.506 + * @param fd socket file descriptor 39.507 + * @param waitq queue 39.508 + * @return 0 on success, error code otherwise 39.509 + */ 39.510 +int sock_remove_wait_queue(int fd, wait_queue_t *waitq){ 39.511 + int err = 0; 39.512 + struct socket *sock = NULL; 39.513 + 39.514 + sock = sockfd_lookup(fd, &err); 39.515 + if (!sock) goto exit; 39.516 + remove_wait_queue(sock->sk->SK_SLEEP, waitq); 39.517 + sockfd_put(sock); 39.518 + exit: 39.519 + return err; 39.520 +} 39.521 + 39.522 +/** Loop handling the varp sockets. 39.523 + * We use kernel API for this (waitqueue, schedule_timeout) instead 39.524 + * of select because the select syscall was returning EFAULT. Oh well. 39.525 + * 39.526 + * @param arg arguments 39.527 + * @return exit code 39.528 + */ 39.529 +int varp_main(void *arg){ 39.530 + int err = 0; 39.531 + long timeout = 3 * HZ; 39.532 + int count = 0; 39.533 + int n = 0; 39.534 + DECLARE_WAITQUEUE(mcast_wait, current); 39.535 + DECLARE_WAITQUEUE(ucast_wait, current); 39.536 + 39.537 + dprintf("> start\n"); 39.538 + atomic_set(&varp_running, 1); 39.539 + err = sock_add_wait_queue(varp_mcast_sock, &mcast_wait); 39.540 + err = sock_add_wait_queue(varp_ucast_sock, &ucast_wait); 39.541 + for(n = 1; atomic_read(&varp_run) == 1; n++){ 39.542 + //dprintf("> n=%d\n", n); 39.543 + count = 0; 39.544 + count += handle_sock_skb(varp_mcast_sock); 39.545 + count += handle_sock_skb(varp_ucast_sock); 39.546 + if(!count){ 39.547 + // No skbs were handled, so go back to sleep. 39.548 + set_current_state(TASK_INTERRUPTIBLE); 39.549 + schedule_timeout(timeout); 39.550 + current->state = TASK_RUNNING; 39.551 + } 39.552 + } 39.553 + sock_remove_wait_queue(varp_mcast_sock, &mcast_wait); 39.554 + sock_remove_wait_queue(varp_ucast_sock, &ucast_wait); 39.555 + atomic_set(&varp_running, 0); 39.556 + //MOD_DEC_USE_COUNT; 39.557 + dprintf("< stop err=%d\n", err); 39.558 + return err; 39.559 +} 39.560 + 39.561 +/** Start the varp thread. 39.562 + * 39.563 + * @return 0 on success, error code otherwise 39.564 + */ 39.565 +int varp_start(void){ 39.566 + int err = 0; 39.567 + void *args = NULL; 39.568 + int flags = 0; 39.569 + long pid = 0; 39.570 + 39.571 + dprintf(">\n"); 39.572 + //flags |= CLONE_VM; 39.573 + flags |= CLONE_FS; 39.574 + flags |= CLONE_FILES; 39.575 + flags |= CLONE_SIGHAND; 39.576 + atomic_set(&varp_run, 1); 39.577 + atomic_set(&varp_running, 0); 39.578 + pid = kernel_thread(varp_main, args, flags); 39.579 + dprintf("< pid=%ld\n", pid); 39.580 + return err; 39.581 +} 39.582 + 39.583 +/** Close the varp sockets and stop the thread handling them. 39.584 + */ 39.585 +void varp_close(void){ 39.586 + mm_segment_t oldfs; 39.587 + long timeout = 1 * HZ; 39.588 + int tries = 10; 39.589 + dprintf(">\n"); 39.590 + // Tell the varp thread to stop and wait a while for it. 39.591 + atomic_set(&varp_run, 0); 39.592 + while(atomic_read(&varp_running) && tries-- > 0){ 39.593 + set_current_state(TASK_INTERRUPTIBLE); 39.594 + schedule_timeout(timeout); 39.595 + current->state = TASK_RUNNING; 39.596 + } 39.597 + // Close the sockets. 39.598 + oldfs = change_fs(KERNEL_DS); 39.599 + if(varp_mcast_sock > 0){ 39.600 + shutdown(varp_mcast_sock, 2); 39.601 + varp_mcast_sock = -1; 39.602 + } 39.603 + if(varp_ucast_sock > 0){ 39.604 + shutdown(varp_ucast_sock, 2); 39.605 + varp_ucast_sock = -1; 39.606 + } 39.607 + set_fs(oldfs); 39.608 + //MOD_DEC_USE_COUNT; 39.609 + dprintf("<\n"); 39.610 +} 39.611 + 39.612 +/** Open the varp sockets and start the thread handling them. 39.613 + * 39.614 + * @param mcaddr multicast address 39.615 + * @param addr unicast address 39.616 + * @param port port 39.617 + * @return 0 on success, error code otherwise 39.618 + */ 39.619 +int varp_open(u32 mcaddr, u32 addr, u16 port){ 39.620 + int err = 0; 39.621 + mm_segment_t oldfs; 39.622 + 39.623 + //MOD_INC_USE_COUNT; 39.624 + dprintf("> mcaddr=%u.%u.%u.%u addr=%u.%u.%u.%u port=%u\n", 39.625 + NIPQUAD(mcaddr), NIPQUAD(addr), ntohs(port)); 39.626 + //MOD_INC_USE_COUNT; 39.627 + oldfs = change_fs(KERNEL_DS); 39.628 + err = varp_mcast_open(mcaddr, addr, port, &varp_mcast_sock); 39.629 + if(err < 0 ) goto exit; 39.630 + err = varp_ucast_open(INADDR_ANY, port, &varp_ucast_sock); 39.631 + if(err < 0 ) goto exit; 39.632 + set_fs(oldfs); 39.633 + err = varp_start(); 39.634 + exit: 39.635 + set_fs(oldfs); 39.636 + if(err){ 39.637 + varp_close(); 39.638 + } 39.639 + dprintf("< err=%d\n", err); 39.640 + return err; 39.641 +} 39.642 +
40.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 40.2 +++ b/tools/vnet/vnet-module/vif.c Mon Nov 22 16:49:15 2004 +0000 40.3 @@ -0,0 +1,267 @@ 40.4 +/* 40.5 + * Copyright (C) 2004 Mike Wray <mike.wray@hp.com> 40.6 + * 40.7 + * This program is free software; you can redistribute it and/or modify 40.8 + * it under the terms of the GNU General Public License as published by the 40.9 + * Free Software Foundation; either version 2 of the License, or (at your 40.10 + * option) any later version. 40.11 + * 40.12 + * This program is distributed in the hope that it will be useful, but 40.13 + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 40.14 + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 40.15 + * for more details. 40.16 + * 40.17 + * You should have received a copy of the GNU General Public License along 40.18 + * with this program; if not, write to the Free software Foundation, Inc., 40.19 + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA 40.20 + * 40.21 + */ 40.22 + 40.23 +#include <linux/config.h> 40.24 +#include <linux/kernel.h> 40.25 +#include <linux/module.h> 40.26 +#include <linux/init.h> 40.27 +#include <linux/string.h> 40.28 + 40.29 +#include <linux/net.h> 40.30 +#include <linux/in.h> 40.31 +#include <linux/inet.h> 40.32 +#include <linux/netdevice.h> 40.33 +#include <linux/udp.h> 40.34 + 40.35 +#include <net/ip.h> 40.36 +#include <net/protocol.h> 40.37 +#include <net/route.h> 40.38 +#include <linux/skbuff.h> 40.39 + 40.40 +#include <etherip.h> 40.41 +#include <if_varp.h> 40.42 +#include <vnet_dev.h> 40.43 +#include <vif.h> 40.44 +#include "allocate.h" 40.45 +#include "hash_table.h" 40.46 +#include "sys_net.h" 40.47 +#include "sys_string.h" 40.48 + 40.49 +#define MODULE_NAME "VNET" 40.50 +#define DEBUG 1 40.51 +#undef DEBUG 40.52 +#include "debug.h" 40.53 + 40.54 +/** Table of vifs indexed by VifKey. */ 40.55 +HashTable *vif_table = NULL; 40.56 + 40.57 +void vif_decref(Vif *vif){ 40.58 + if(!vif) return; 40.59 + if(atomic_dec_and_test(&vif->refcount)){ 40.60 + kfree(vif); 40.61 + } 40.62 +} 40.63 + 40.64 +void vif_incref(Vif *vif){ 40.65 + if(!vif) return; 40.66 + atomic_inc(&vif->refcount); 40.67 +} 40.68 + 40.69 +/** Hash function for keys in the vif table. 40.70 + * Hashes the vnet id and mac. 40.71 + * 40.72 + * @param k key (VifKey) 40.73 + * @return hashcode 40.74 + */ 40.75 +Hashcode vif_key_hash_fn(void *k){ 40.76 + VifKey *key = k; 40.77 + Hashcode h; 40.78 + h = hash_2ul(key->vnet, 40.79 + (key->vmac.mac[0] << 24) | 40.80 + (key->vmac.mac[1] << 16) | 40.81 + (key->vmac.mac[2] << 8) | 40.82 + (key->vmac.mac[3] )); 40.83 + h = hash_hul(h, 40.84 + (key->vmac.mac[4] << 8) | 40.85 + (key->vmac.mac[5] )); 40.86 + return h; 40.87 +} 40.88 + 40.89 + 40.90 +/** Test equality for keys in the vif table. 40.91 + * Compares vnet and mac. 40.92 + * 40.93 + * @param k1 key to compare (VifKey) 40.94 + * @param k2 key to compare (VifKey) 40.95 + * @return 1 if equal, 0 otherwise 40.96 + */ 40.97 +int vif_key_equal_fn(void *k1, void *k2){ 40.98 + VifKey *key1 = k1; 40.99 + VifKey *key2 = k2; 40.100 + return (key1->vnet == key2->vnet) && (memcmp(key1->vmac.mac, key2->vmac.mac, ETH_ALEN) == 0); 40.101 +} 40.102 + 40.103 +/** Free an entry in the vif table. 40.104 + * 40.105 + * @param table containing table 40.106 + * @param entry entry to free 40.107 + */ 40.108 +static void vif_entry_free_fn(HashTable *table, HTEntry *entry){ 40.109 + Vif *vif; 40.110 + if(!entry) return; 40.111 + vif = entry->value; 40.112 + if(vif){ 40.113 + vif_decref(vif); 40.114 + } 40.115 + HTEntry_free(entry); 40.116 +} 40.117 + 40.118 +/** Lookup a vif. 40.119 + * 40.120 + * @param vnet vnet id 40.121 + * @param mac MAC address 40.122 + * @return 0 on success, -ENOENT otherwise 40.123 + */ 40.124 +int vif_lookup(int vnet, Vmac *vmac, Vif **vif){ 40.125 + int err = 0; 40.126 + VifKey key = {}; 40.127 + HTEntry *entry = NULL; 40.128 + 40.129 + key.vnet = vnet; 40.130 + key.vmac = *vmac; 40.131 + entry = HashTable_get_entry(vif_table, &key); 40.132 + if(entry){ 40.133 + *vif = entry->value; 40.134 + vif_incref(*vif); 40.135 + } else { 40.136 + *vif = NULL; 40.137 + err = -ENOENT; 40.138 + } 40.139 + //dprintf("< err=%d addr=" IPFMT "\n", err, NIPQUAD(*coaddr)); 40.140 + return err; 40.141 +} 40.142 + 40.143 +/** Create a new vif. 40.144 + * 40.145 + * @param vnet vnet id 40.146 + * @param mac MAC address 40.147 + * @return 0 on success, negative error code otherwise 40.148 + */ 40.149 +int vif_add(int vnet, Vmac *vmac, Vif **val){ 40.150 + int err = 0; 40.151 + Vif *vif = NULL; 40.152 + HTEntry *entry; 40.153 + dprintf("> vnet=%d\n", vnet); 40.154 + vif = ALLOCATE(Vif); 40.155 + if(!vif){ 40.156 + err = -ENOMEM; 40.157 + goto exit; 40.158 + } 40.159 + atomic_set(&vif->refcount, 1); 40.160 + vif->vnet = vnet; 40.161 + vif->vmac = *vmac; 40.162 + entry = HashTable_add(vif_table, vif, vif); 40.163 + if(!entry){ 40.164 + err = -ENOMEM; 40.165 + deallocate(vif); 40.166 + vif = NULL; 40.167 + goto exit; 40.168 + } 40.169 + vif_incref(vif); 40.170 + exit: 40.171 + *val = (err ? NULL : vif); 40.172 + dprintf("< err=%d\n", err); 40.173 + return err; 40.174 +} 40.175 + 40.176 +/** Delete an entry. 40.177 + * 40.178 + * @param vnet vnet id 40.179 + * @param mac MAC address 40.180 + * @param coaddr return parameter for care-of address 40.181 + * @return number of entries deleted, or negative error code 40.182 + */ 40.183 +int vif_remove(int vnet, Vmac *vmac){ 40.184 + int err = 0; 40.185 + VifKey key = { .vnet = vnet, .vmac = *vmac }; 40.186 + //dprintf("> vnet=%d addr=%u.%u.%u.%u\n", vnet, NIPQUAD(coaddr)); 40.187 + err = HashTable_remove(vif_table, &key); 40.188 + //dprintf("< err=%d\n", err); 40.189 + return err; 40.190 +} 40.191 + 40.192 +int vif_find(int vnet, Vmac *vmac, int create, Vif **vif){ 40.193 + int err = 0; 40.194 + 40.195 + err = vif_lookup(vnet, vmac, vif); 40.196 + if(err && create){ 40.197 + err = vif_add(vnet, vmac, vif); 40.198 + } 40.199 + return err; 40.200 +} 40.201 + 40.202 +void vif_purge(void){ 40.203 + HashTable_clear(vif_table); 40.204 +} 40.205 + 40.206 +int vif_create(int vnet, Vmac *vmac, Vif **vif){ 40.207 + int err = 0; 40.208 + 40.209 + dprintf(">\n"); 40.210 + if(!vif_lookup(vnet, vmac, vif)){ 40.211 + err = -EEXIST; 40.212 + goto exit; 40.213 + } 40.214 + dprintf("> vif_add...\n"); 40.215 + err = vif_add(vnet, vmac, vif); 40.216 + exit: 40.217 + if(err){ 40.218 + *vif = NULL; 40.219 + } 40.220 + dprintf("< err=%d\n", err); 40.221 + return err; 40.222 +} 40.223 + 40.224 +/** Create a vif. 40.225 + * 40.226 + * @param vnet vnet id 40.227 + * @param mac mac address (as a string) 40.228 + * @return 0 on success, error code otherwise 40.229 + */ 40.230 +int mkvif(int vnet, char *mac){ 40.231 + int err = 0; 40.232 + Vmac vmac = {}; 40.233 + Vif *vif = NULL; 40.234 + dprintf("> vnet=%d mac=%s\n", vnet, mac); 40.235 + err = mac_aton(mac, vmac.mac); 40.236 + if(err) goto exit; 40.237 + err = vif_create(vnet, &vmac, &vif); 40.238 + exit: 40.239 + dprintf("< err=%d\n", err); 40.240 + return err; 40.241 +} 40.242 + 40.243 +/** Initialize the vif table. 40.244 + * 40.245 + * @return 0 on success, error code otherwise 40.246 + */ 40.247 +int vif_init(void){ 40.248 + int err = 0; 40.249 + dprintf(">\n"); 40.250 + vif_table = HashTable_new(0); 40.251 + if(!vif_table){ 40.252 + err = -ENOMEM; 40.253 + goto exit; 40.254 + } 40.255 + vif_table->entry_free_fn = vif_entry_free_fn; 40.256 + vif_table->key_hash_fn = vif_key_hash_fn; 40.257 + vif_table->key_equal_fn = vif_key_equal_fn; 40.258 + 40.259 + // Some vifs for testing. 40.260 + //mkvif(1, "aa:00:00:00:20:11"); 40.261 + //mkvif(2, "aa:00:00:00:20:12"); 40.262 + exit: 40.263 + if(err < 0) wprintf("< err=%d\n", err); 40.264 + dprintf("< err=%d\n", err); 40.265 + return err; 40.266 +} 40.267 + 40.268 +void vif_exit(void){ 40.269 + HashTable_free(vif_table); 40.270 +}
41.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 41.2 +++ b/tools/vnet/vnet-module/vif.h Mon Nov 22 16:49:15 2004 +0000 41.3 @@ -0,0 +1,55 @@ 41.4 +/* 41.5 + * Copyright (C) 2004 Mike Wray <mike.wray@hp.com> 41.6 + * 41.7 + * This program is free software; you can redistribute it and/or modify 41.8 + * it under the terms of the GNU General Public License as published by the 41.9 + * Free Software Foundation; either version 2 of the License, or (at your 41.10 + * option) any later version. 41.11 + * 41.12 + * This program is distributed in the hope that it will be useful, but 41.13 + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 41.14 + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 41.15 + * for more details. 41.16 + * 41.17 + * You should have received a copy of the GNU General Public License along 41.18 + * with this program; if not, write to the Free software Foundation, Inc., 41.19 + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA 41.20 + * 41.21 + */ 41.22 +#ifndef _VNET_VIF_H_ 41.23 +#define _VNET_VIF_H_ 41.24 + 41.25 +#include <if_varp.h> 41.26 +struct net_device; 41.27 + 41.28 +/** Key for entries in the vif table. */ 41.29 +typedef struct VifKey { 41.30 + int vnet; 41.31 + Vmac vmac; 41.32 +} VifKey; 41.33 + 41.34 +typedef struct Vif { 41.35 + int vnet; 41.36 + Vmac vmac; 41.37 + struct net_device *dev; 41.38 + atomic_t refcount; 41.39 +} Vif; 41.40 + 41.41 +struct HashTable; 41.42 +extern struct HashTable *vif_table; 41.43 + 41.44 +extern void vif_decref(Vif *vif); 41.45 +extern void vif_incref(Vif *vif); 41.46 + 41.47 +extern int vif_create(int vnet, Vmac *vmac, Vif **vif); 41.48 + 41.49 +extern int vif_add(int vnet, Vmac *vmac, Vif **vif); 41.50 +extern int vif_lookup(int vnet, Vmac *vmac, Vif **vif); 41.51 +extern int vif_remove(int vnet, Vmac *vmac); 41.52 +extern int vif_find(int vnet, Vmac *vmac, int create, Vif **vif); 41.53 +extern void vif_purge(void); 41.54 + 41.55 +extern int vif_init(void); 41.56 +extern void vif_exit(void); 41.57 + 41.58 +#endif
42.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 42.2 +++ b/tools/vnet/vnet-module/vnet.c Mon Nov 22 16:49:15 2004 +0000 42.3 @@ -0,0 +1,767 @@ 42.4 +/* 42.5 + * Copyright (C) 2004 Mike Wray <mike.wray@hp.com> 42.6 + * 42.7 + * This program is free software; you can redistribute it and/or modify 42.8 + * it under the terms of the GNU General Public License as published by the 42.9 + * Free Software Foundation; either version 2 of the License, or (at your 42.10 + * option) any later version. 42.11 + * 42.12 + * This program is distributed in the hope that it will be useful, but 42.13 + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 42.14 + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 42.15 + * for more details. 42.16 + * 42.17 + * You should have received a copy of the GNU General Public License along 42.18 + * with this program; if not, write to the Free software Foundation, Inc., 42.19 + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA 42.20 + * 42.21 + */ 42.22 + 42.23 +#include <linux/config.h> 42.24 +#include <linux/module.h> 42.25 +#include <linux/types.h> 42.26 +#include <linux/kernel.h> 42.27 +#include <linux/version.h> 42.28 +#include <linux/errno.h> 42.29 + 42.30 +#include <linux/string.h> 42.31 + 42.32 +#include <linux/net.h> 42.33 +#include <linux/in.h> 42.34 +#include <linux/inet.h> 42.35 +#include <linux/netdevice.h> 42.36 + 42.37 +#include <linux/etherdevice.h> 42.38 +#include <net/ip.h> 42.39 +#include <net/protocol.h> 42.40 +#include <net/route.h> 42.41 +#include <linux/skbuff.h> 42.42 +#include <net/checksum.h> 42.43 + 42.44 +#include <tunnel.h> 42.45 +#include <sa.h> 42.46 +#include <varp.h> 42.47 +#include <if_varp.h> 42.48 +#include <esp.h> 42.49 +#include <etherip.h> 42.50 +#include <random.h> 42.51 +#include <tunnel.h> 42.52 + 42.53 +#include <vnet_dev.h> 42.54 +#include <vnet.h> 42.55 +#include <vif.h> 42.56 +#include <vnet_ioctl.h> 42.57 +#include <sa_algorithm.h> 42.58 + 42.59 +#include "allocate.h" 42.60 +#include "hash_table.h" 42.61 +#include "sys_net.h" 42.62 +#include "sys_string.h" 42.63 + 42.64 +#define MODULE_NAME "VNET" 42.65 +#define DEBUG 1 42.66 +#undef DEBUG 42.67 +#include "debug.h" 42.68 + 42.69 +/** Default vnet security level. 42.70 + */ 42.71 +int vnet_security_default = SA_AUTH ; //| SA_CONF; 42.72 + 42.73 +/** Key for entries in the vnet address table. */ 42.74 +typedef struct VnetAddrKey { 42.75 + /** Vnet id. */ 42.76 + int vnet; 42.77 + /** MAC address. */ 42.78 + unsigned char mac[ETH_ALEN]; 42.79 +} VnetAddrKey; 42.80 + 42.81 +/** The physical vnet. */ 42.82 +Vnet *vnet_physical = NULL; 42.83 + 42.84 +/** Table of vnets indexed by id. */ 42.85 +static HashTable *vnet_table = NULL; 42.86 + 42.87 +/** Decrement reference count, freeing if zero. 42.88 + * 42.89 + * @param info vnet (OK if null) 42.90 + */ 42.91 +void Vnet_decref(Vnet *info){ 42.92 + if(!info) return; 42.93 + if(atomic_dec_and_test(&info->refcount)){ 42.94 + dprintf("> free vnet=%u\n", info->vnet); 42.95 + vnet_dev_remove(info); 42.96 + deallocate(info); 42.97 + } 42.98 +} 42.99 + 42.100 +/** Increment reference count. 42.101 + * 42.102 + * @param info vnet (OK if null) 42.103 + */ 42.104 +void Vnet_incref(Vnet *info){ 42.105 + if(!info) return; 42.106 + atomic_inc(&info->refcount); 42.107 +} 42.108 + 42.109 +/** Allocate a vnet, setting reference count to 1. 42.110 + * 42.111 + * @param info return parameter for vnet 42.112 + * @return 0 on success, error code otherwise 42.113 + */ 42.114 +int Vnet_alloc(Vnet **info){ 42.115 + int err = 0; 42.116 + *info = ALLOCATE(Vnet); 42.117 + if(*info){ 42.118 + atomic_set(&(*info)->refcount, 1); 42.119 + } else { 42.120 + err = -ENOMEM; 42.121 + } 42.122 + return err; 42.123 +} 42.124 + 42.125 +/** Add a vnet to the table under its vnet id. 42.126 + * 42.127 + * @param info vnet to add 42.128 + * @return 0 on success, error code otherwise 42.129 + */ 42.130 +int Vnet_add(Vnet *info){ 42.131 + int err = 0; 42.132 + HTEntry *entry = NULL; 42.133 + // Vnet_del(info->vnet); //todo: Delete existing vnet info? 42.134 + Vnet_incref(info); 42.135 + entry = HashTable_add(vnet_table, HKEY(info->vnet), info); 42.136 + if(!entry){ 42.137 + err = -ENOMEM; 42.138 + Vnet_decref(info); 42.139 + } 42.140 + return err; 42.141 +} 42.142 + 42.143 +/** Remove a vnet from the table. 42.144 + * 42.145 + * @param vnet id of vnet to remove 42.146 + * @return number of vnets removed 42.147 + */ 42.148 +int Vnet_del(vnetid_t vnet){ 42.149 + return HashTable_remove(vnet_table, HKEY(vnet)); 42.150 +} 42.151 + 42.152 +/** Lookup a vnet by id. 42.153 + * References the vnet on success - the caller must decref. 42.154 + * 42.155 + * @param vnet vnet id 42.156 + * @param info return parameter for vnet 42.157 + * @return 0 on sucess, -ENOENT if no vnet found 42.158 + */ 42.159 +int Vnet_lookup(vnetid_t vnet, Vnet **info){ 42.160 + int err = 0; 42.161 + dprintf("> vnet=%u info=%p\n", vnet, info); 42.162 + dprintf("> vnet_table=%p\n",vnet_table); 42.163 + *info = HashTable_get(vnet_table, HKEY(vnet)); 42.164 + if(*info){ 42.165 + Vnet_incref(*info); 42.166 + } else { 42.167 + err = -ENOENT; 42.168 + } 42.169 + dprintf("< err=%d\n", err); 42.170 + return err; 42.171 +} 42.172 + 42.173 +/** Free an entry in the vnet table. 42.174 + * 42.175 + * @param table containing table 42.176 + * @param entry to free 42.177 + */ 42.178 +static void vnet_entry_free_fn(HashTable *table, HTEntry *entry){ 42.179 + Vnet *info; 42.180 + if(!entry) return; 42.181 + info = entry->value; 42.182 + if(info){ 42.183 + vnet_dev_remove(info); 42.184 + Vnet_decref(info); 42.185 + } 42.186 + HTEntry_free(entry); 42.187 +} 42.188 + 42.189 +/** Setup some vnet entries (for testing). 42.190 + * Vnet 1 is physical, vnets 2 to 10 are insecure, vnets above 42.191 + * 10 are secure. 42.192 + * 42.193 + * @return 0 on success, negative error code otherwise 42.194 + */ 42.195 +static int vnet_setup(void){ 42.196 + int err = 0; 42.197 + int i, n = 5; //20; 42.198 + int security = vnet_security_default; 42.199 + Vnet *vnet; 42.200 + 42.201 + dprintf(">\n"); 42.202 + for(i=0; i<n; i++){ 42.203 + err = Vnet_alloc(&vnet); 42.204 + if(err) break; 42.205 + vnet->vnet = VNET_VIF + i; 42.206 + vnet->security = (vnet->vnet > 10 ? security : 0); 42.207 + //err = Vnet_add(vnet); 42.208 + err = Vnet_create(vnet); 42.209 + if(err) break; 42.210 + } 42.211 + dprintf("< err=%d\n", err); 42.212 + return err; 42.213 +} 42.214 + 42.215 +/** Initialize the vnet table and the physical vnet. 42.216 + * 42.217 + * @return 0 on success, error code otherwise 42.218 + */ 42.219 +int vnet_init(void){ 42.220 + int err = 0; 42.221 + 42.222 + dprintf(">\n"); 42.223 + vnet_table = HashTable_new(0); 42.224 + dprintf("> vnet_table=%p\n", vnet_table); 42.225 + if(!vnet_table){ 42.226 + err = -ENOMEM; 42.227 + goto exit; 42.228 + } 42.229 + vnet_table->entry_free_fn = vnet_entry_free_fn; 42.230 + 42.231 + err = Vnet_alloc(&vnet_physical); 42.232 + if(err) goto exit; 42.233 + vnet_physical->vnet = VNET_PHYS; 42.234 + vnet_physical->security = 0; 42.235 + err = Vnet_add(vnet_physical); 42.236 + if(err) goto exit; 42.237 + err = vnet_setup(); 42.238 + if(err) goto exit; 42.239 + err = varp_init(); 42.240 + if(err) goto exit; 42.241 + err = vif_init(); 42.242 + exit: 42.243 + if(err < 0) wprintf("< err=%d\n", err); 42.244 + return err; 42.245 +} 42.246 + 42.247 +void vnet_exit(void){ 42.248 + vif_exit(); 42.249 + varp_exit(); 42.250 + HashTable_free(vnet_table); 42.251 + vnet_table = NULL; 42.252 +} 42.253 + 42.254 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) 42.255 + 42.256 +static inline int skb_route(struct sk_buff *skb, struct rtable **prt){ 42.257 + int err = 0; 42.258 + struct flowi fl = { 42.259 + .oif = skb->dev->ifindex, 42.260 + .nl_u = { 42.261 + .ip4_u = { 42.262 + .daddr = skb->nh.iph->daddr, 42.263 + .saddr = skb->nh.iph->saddr, 42.264 + .tos = skb->nh.iph->tos, 42.265 + } 42.266 + } 42.267 + }; 42.268 + 42.269 + err = ip_route_output_key(prt, &fl); 42.270 + return err; 42.271 +} 42.272 + 42.273 +#else 42.274 + 42.275 +static inline int skb_route(struct sk_buff *skb, struct rtable **prt){ 42.276 + int err = 0; 42.277 + struct rt_key key = { }; 42.278 + key.dst = skb->nh.iph->daddr; 42.279 + key.src = skb->nh.iph->saddr; 42.280 + key.tos = skb->nh.iph->tos; 42.281 + key.oif = skb->dev->ifindex; 42.282 + err = ip_route_output_key(prt, &key); 42.283 + return err; 42.284 +} 42.285 + 42.286 +#endif 42.287 + 42.288 +inline int skb_xmit(struct sk_buff *skb){ 42.289 + int err = 0; 42.290 + struct rtable *rt = NULL; 42.291 + 42.292 + dprintf("> skb=%p dev=%s\n", skb, skb->dev->name); 42.293 + 42.294 + skb->protocol = htons(ETH_P_IP); 42.295 + err = skb_route(skb, &rt); 42.296 + if(err) goto exit; 42.297 + skb->dst = &rt->u.dst; 42.298 + 42.299 + ip_select_ident(skb->nh.iph, &rt->u.dst, NULL); 42.300 + 42.301 + if(skb->nh.iph->saddr == 0){ 42.302 + skb->nh.iph->saddr = rt->rt_src; 42.303 + } 42.304 + 42.305 + skb->nh.iph->check = 0; 42.306 + skb->nh.iph->check = ip_compute_csum(skb->nh.raw, (skb->nh.iph->ihl << 2)); 42.307 + 42.308 + err = neigh_compat_output(skb); 42.309 + 42.310 + exit: 42.311 + dprintf("< err=%d\n", err); 42.312 + return err; 42.313 +} 42.314 + 42.315 +/** Called when a vif sends a packet to the network. 42.316 + * Encapsulates the packet for its vnet and forwards it. 42.317 + * 42.318 + * @param skb packet 42.319 + * @return 0 on success, error code otherwise 42.320 + * 42.321 + * @todo fixme 42.322 + */ 42.323 +int vnet_skb_send(struct sk_buff *skb, u32 vnet){ 42.324 + int err = 0; 42.325 + Vif *vif = NULL; 42.326 + 42.327 + dprintf("> skb=%p vnet=%u\n", skb, vnet); 42.328 + if(vnet == VNET_PHYS || !vnet){ 42.329 + // For completeness, send direct to the network. 42.330 + if(skb->dev){ 42.331 + err = skb_xmit(skb); 42.332 + } else { 42.333 + // Can't assume eth0 - might be nbe-br or other. Need to route. 42.334 + struct net_device *dev = NULL; 42.335 + err = vnet_get_device(DEVICE, &dev); 42.336 + if(err) goto exit; 42.337 + skb->dev = dev; 42.338 + err = skb_xmit(skb); 42.339 + dev_put(dev); 42.340 + } 42.341 + } else { 42.342 + dprintf("> varp_output\n"); 42.343 + err = varp_output(skb, vnet); 42.344 + } 42.345 + //dprintf("< err=%d\n", err); 42.346 + exit: 42.347 + if(vif) vif_decref(vif); 42.348 + dprintf("< err=%d\n", err); 42.349 + return err; 42.350 +} 42.351 + 42.352 +/** Receive an skb for a vnet. 42.353 + * If the dest is broadcast, goes to all vifs on the vnet. 42.354 + * If the dest is unicast, goes to addressed vif on vnet. 42.355 + * For each vif we set the packet dev and receive the packet. 42.356 + * 42.357 + * The packet must have skb->mac.raw set and skb->data must point 42.358 + * after the device (ethernet) header. 42.359 + * 42.360 + * @param skb packet 42.361 + * @param vnet packet vnet 42.362 + * @param vmac packet vmac 42.363 + * @return 0 on success, error code otherwise 42.364 + */ 42.365 +#if 1 42.366 +int vnet_skb_recv(struct sk_buff *skb, u32 vnet, Vmac *vmac){ 42.367 + // Receive the skb for a vnet. 42.368 + // We make the skb come out of the vif for the vnet, and 42.369 + // let ethernet bridging forward it to related interfaces. 42.370 + int err = 0; 42.371 + Vnet *info = NULL; 42.372 + 42.373 + dprintf("> vnet=%u mac=%s\n", vnet, mac_ntoa(vmac->mac)); 42.374 + err = Vnet_lookup(vnet, &info); 42.375 + if(err) goto exit; 42.376 + skb->dev = info->dev; 42.377 + dprintf("> netif_rx dev=%s\n", skb->dev->name); 42.378 + netif_rx(skb); 42.379 + exit: 42.380 + if(info) Vnet_decref(info); 42.381 + if(err){ 42.382 + kfree_skb(skb); 42.383 + } 42.384 + dprintf("< err=%d\n", err); 42.385 + return err; 42.386 +} 42.387 + 42.388 +#else 42.389 +int vnet_skb_recv(struct sk_buff *skb, u32 vnet, Vmac *vmac){ 42.390 + int err = 0; 42.391 + Vif *vif = NULL; 42.392 + 42.393 + dprintf("> vnet=%u mac=%s\n", vnet, mac_ntoa(vmac->mac)); 42.394 + if(mac_is_multicast(vmac->mac)){ 42.395 + HashTable_for_decl(entry); 42.396 + int count = 0; 42.397 + struct sk_buff *new_skb; 42.398 + 42.399 + HashTable_for_each(entry, vif_table){ 42.400 + vif = entry->value; 42.401 + if(vif->vnet != vnet) continue; 42.402 + count++; 42.403 + new_skb = skb_copy(skb, GFP_ATOMIC); 42.404 + if(!new_skb) break; 42.405 + new_skb->dev = vif->dev; 42.406 + dprintf("> %d] netif_rx dev=%s\n", count, new_skb->dev->name); 42.407 + netif_rx(new_skb); 42.408 + } 42.409 + kfree_skb(skb); 42.410 + } else { 42.411 + err = vif_lookup(vnet, vmac, &vif); 42.412 + if(err){ 42.413 + kfree_skb(skb); 42.414 + goto exit; 42.415 + } 42.416 + skb->dev = vif->dev; 42.417 + dprintf("> netif_rx dev=%s\n", skb->dev->name); 42.418 + netif_rx(skb); 42.419 + } 42.420 + exit: 42.421 + dprintf("< err=%d\n", err); 42.422 + return err; 42.423 +} 42.424 +#endif 42.425 + 42.426 +/** Check validity of an incoming IP frame. 42.427 + * 42.428 + * @param skb frame 42.429 + * @return 0 if ok, error code otherwise 42.430 + * 42.431 + * @todo fixme Can prob skip most of this because linux will have done it. 42.432 + * @todo Only need the vnet skb context check. 42.433 + */ 42.434 +int check_ip_frame(struct sk_buff *skb){ 42.435 + int err = -EINVAL; 42.436 + struct iphdr* iph; 42.437 + struct net_device *dev; 42.438 + __u32 len; 42.439 + __u16 check; 42.440 + 42.441 +#if 0 42.442 + if(skb->context){ 42.443 + // Todo: After ESP want to skip most checks (including checksum), 42.444 + // Todo: but in general may not want to skip all checks on detunnel. 42.445 + //dprintf("> Skip check, has context\n"); 42.446 + err = 0; 42.447 + goto exit; 42.448 + } 42.449 +#endif 42.450 + // Check we have enough for an ip header - the skb passed should 42.451 + // have data pointing at the eth header and skb->len should include 42.452 + // that. skb->nh should already have been set. Let the indvidual 42.453 + // protocol handlers worry about the exact ip header len 42.454 + // (i.e. whether any ip options are set). 42.455 + dev = skb->dev; 42.456 + 42.457 + if(skb->len < ETH_HLEN + sizeof(struct iphdr)){ 42.458 + wprintf("> packet too short for ip header\n"); 42.459 + goto exit; 42.460 + } 42.461 + 42.462 + iph = skb->nh.iph; 42.463 + /* 42.464 + * RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the checksum. 42.465 + * 42.466 + * Is the datagram acceptable? 42.467 + * 42.468 + * 1. Length at least the size of an ip header 42.469 + * 2. Version of 4 42.470 + * 3. Checksums correctly. [Speed optimisation for later, skip loopback checksums] 42.471 + * 4. Doesn't have a bogus length 42.472 + */ 42.473 + if (iph->ihl < 5 || iph->version != 4){ 42.474 + wprintf("> len and version check failed\n"); 42.475 + goto exit; 42.476 + } 42.477 + if(skb->len < ETH_HLEN + (iph->ihl << 2)){ 42.478 + wprintf("> packet too short for given ihl\n"); 42.479 + goto exit; 42.480 + } 42.481 + 42.482 + check = iph->check; 42.483 + //iph->check = 0; 42.484 + //iph->check = compute_cksum((__u16 *)iph, (iph->ihl << 1)); 42.485 + if(iph->check != check){ 42.486 + wprintf("> invalid checksum\n"); 42.487 + goto exit; 42.488 + } 42.489 + 42.490 + len = ntohs(iph->tot_len); 42.491 + if (skb->len < len + ETH_HLEN || len < (iph->ihl << 2)){ 42.492 + wprintf("> packet too short for tot_len\n"); 42.493 + goto exit; 42.494 + } 42.495 + skb->h.raw = skb->nh.raw + (iph->ihl << 2); 42.496 + err = 0; 42.497 + exit: 42.498 + return err; 42.499 +} 42.500 + 42.501 +/** Determine ESP security mode for a new SA. 42.502 + * 42.503 + * @param spi incoming spi 42.504 + * @param protocol incoming protocol 42.505 + * @param addr source address 42.506 + * @return security level or negative error code 42.507 + * 42.508 + * @todo Need to check spi, and do some lookup for security params. 42.509 + */ 42.510 +int vnet_sa_security(u32 spi, int protocol, u32 addr){ 42.511 + int security = vnet_security_default; 42.512 + dprintf("< security=%x\n", security); 42.513 + return security; 42.514 +} 42.515 + 42.516 +/** Create a new SA for incoming traffic. 42.517 + * 42.518 + * @param spi incoming spi 42.519 + * @param protocol incoming protocol 42.520 + * @param addr source address 42.521 + * @param sa return parameter for SA 42.522 + * @return 0 on success, error code otherwise 42.523 + */ 42.524 +int vnet_sa_create(u32 spi, int protocol, u32 addr, SAState **sa){ 42.525 + int err = 0; 42.526 + int security = vnet_sa_security(spi, protocol, addr); 42.527 + if(security < 0){ 42.528 + err = security; 42.529 + goto exit; 42.530 + } 42.531 + err = sa_create(security, spi, protocol, addr, sa); 42.532 + exit: 42.533 + return err; 42.534 +} 42.535 + 42.536 +/** Check that a context has the correct properties w.r.t. a vnet. 42.537 + * The context must be secure if the vnet requires security. 42.538 + * 42.539 + * @param vnet vnet id 42.540 + * @param context context 42.541 + * @return 0 on success, error code otherwise 42.542 + * 42.543 + * @todo Need to check that the sa provides the correct security level. 42.544 + */ 42.545 +int vnet_check_context(int vnet, SkbContext *context, Vnet **val){ 42.546 + int err = 0; 42.547 + Vnet *info = NULL; 42.548 + SAState *sa = NULL; 42.549 + 42.550 + err = Vnet_lookup(vnet, &info); 42.551 + if(err){ 42.552 + wprintf("> No vnet %d\n", vnet); 42.553 + goto exit; 42.554 + } 42.555 + if(!info->security) goto exit; 42.556 + err = -EINVAL; 42.557 + if(!context){ 42.558 + wprintf("> No security context\n"); 42.559 + goto exit; 42.560 + } 42.561 + if(context->protocol != IPPROTO_ESP){ 42.562 + wprintf("> Invalid protocol: wanted %d, got %d\n", IPPROTO_ESP, context->protocol); 42.563 + goto exit; 42.564 + } 42.565 + sa = context->data; 42.566 + //todo: Check security properties of the SA are correct w.r.t. the vnet. 42.567 + //Something like sa->security == info->security; 42.568 + err = 0; 42.569 + exit: 42.570 + *val = info; 42.571 + return err; 42.572 +} 42.573 + 42.574 +/** Open function for SA tunnels. 42.575 + * 42.576 + * @param tunnel to open 42.577 + * @return 0 on success, error code otherwise 42.578 + */ 42.579 +static int sa_tunnel_open(Tunnel *tunnel){ 42.580 + int err = 0; 42.581 + //dprintf(">\n"); 42.582 + //dprintf("< err=%d\n", err); 42.583 + return err; 42.584 +} 42.585 + 42.586 +/** Close function for SA tunnels. 42.587 + * 42.588 + * @param tunnel to close (OK if null) 42.589 + */ 42.590 +static void sa_tunnel_close(Tunnel *tunnel){ 42.591 + SAState *sa; 42.592 + dprintf(">\n"); 42.593 + if(!tunnel) return; 42.594 + sa = tunnel->data; 42.595 + if(!sa) return; 42.596 + SAState_decref(sa); 42.597 + tunnel->data = NULL; 42.598 + dprintf("<\n"); 42.599 +} 42.600 + 42.601 +/** Packet send function for SA tunnels. 42.602 + * 42.603 + * @param tunnel to send on 42.604 + * @param skb packet to send 42.605 + * @return 0 on success, negative error code on error 42.606 + */ 42.607 +static int sa_tunnel_send(Tunnel *tunnel, struct sk_buff *skb){ 42.608 + int err = -EINVAL; 42.609 + SAState *sa; 42.610 + //dprintf("> tunnel=%p\n", tunnel); 42.611 + if(!tunnel){ 42.612 + wprintf("> Null tunnel!\n"); 42.613 + goto exit; 42.614 + } 42.615 + sa = tunnel->data; 42.616 + if(!sa){ 42.617 + wprintf("> Null SA!\n"); 42.618 + goto exit; 42.619 + } 42.620 + err = SAState_send(sa, skb, tunnel->base); 42.621 + exit: 42.622 + //dprintf("< err=%d\n", err); 42.623 + return err; 42.624 +} 42.625 + 42.626 +/** Functions used by SA tunnels. */ 42.627 +static TunnelType _sa_tunnel_type = { 42.628 + .name = "SA", 42.629 + .open = sa_tunnel_open, 42.630 + .close = sa_tunnel_close, 42.631 + .send = sa_tunnel_send 42.632 +}; 42.633 + 42.634 +/** Functions used by SA tunnels. */ 42.635 +TunnelType *sa_tunnel_type = &_sa_tunnel_type; 42.636 + 42.637 +/** Open a tunnel for a vnet to a given address. 42.638 + * 42.639 + * @param vnet vnet id 42.640 + * @param addr destination address 42.641 + * @param tunnel return parameter 42.642 + * @return 0 on success, error code otherwise 42.643 + */ 42.644 +int vnet_tunnel_open(u32 vnet, u32 addr, Tunnel **tunnel){ 42.645 + extern TunnelType *etherip_tunnel_type; 42.646 + int err = 0; 42.647 + Vnet *info = NULL; 42.648 + Tunnel *base_tunnel = NULL; 42.649 + Tunnel *sa_tunnel = NULL; 42.650 + Tunnel *etherip_tunnel = NULL; 42.651 + 42.652 + dprintf("> vnet=%u addr=" IPFMT "\n", vnet, NIPQUAD(addr)); 42.653 + err = Vnet_lookup(vnet, &info); 42.654 + dprintf("> Vnet_lookup=%d\n", err); 42.655 + if(err) goto exit; 42.656 + if(info->security){ 42.657 + SAState *sa = NULL; 42.658 + dprintf("> security=%d\n", info->security); 42.659 + err = Tunnel_create(sa_tunnel_type, vnet, addr, base_tunnel, &sa_tunnel); 42.660 + if(err) goto exit; 42.661 + dprintf("> sa_tunnel=%p\n", sa_tunnel); 42.662 + err = sa_create(info->security, 0, IPPROTO_ESP, addr, &sa); 42.663 + if(err) goto exit; 42.664 + sa_tunnel->data = sa; 42.665 + dprintf("> sa=%p\n", sa); 42.666 + base_tunnel = sa_tunnel; 42.667 + } 42.668 + err = Tunnel_create(etherip_tunnel_type, vnet, addr, base_tunnel, ðerip_tunnel); 42.669 + if(err) goto exit; 42.670 + err = Tunnel_add(etherip_tunnel); 42.671 + exit: 42.672 + Tunnel_decref(sa_tunnel); 42.673 + Vnet_decref(info); 42.674 + if(err){ 42.675 + *tunnel = NULL; 42.676 + } else { 42.677 + *tunnel = etherip_tunnel; 42.678 + } 42.679 + dprintf("< err=%d\n", err); 42.680 + return err; 42.681 +} 42.682 + 42.683 +/** Lookup a tunnel for a vnet to a given address. 42.684 + * Uses an existing tunnel if there is one. 42.685 + * 42.686 + * @param vnet vnet id 42.687 + * @param addr care-of address 42.688 + * @param tunnel return parameter 42.689 + * @return 0 on success, error code otherwise 42.690 + */ 42.691 +int vnet_tunnel_lookup(u32 vnet, u32 addr, Tunnel **tunnel){ 42.692 + int err = 0; 42.693 + dprintf("> vnet=%d addr=" IPFMT "\n", vnet, NIPQUAD(addr)); 42.694 + *tunnel = Tunnel_lookup(vnet, addr); 42.695 + if(!*tunnel){ 42.696 + err = vnet_tunnel_open(vnet, addr, tunnel); 42.697 + } 42.698 + dprintf("< err=%d\n", err); 42.699 + return err; 42.700 +} 42.701 + 42.702 +/** Send a packet on the appropriate tunnel. 42.703 + * 42.704 + * @param vnet vnet 42.705 + * @param addr tunnel endpoint 42.706 + * @param skb packet 42.707 + * @return 0 on success, error code otherwise 42.708 + */ 42.709 +int vnet_tunnel_send(vnetid_t vnet, vnetaddr_t addr, struct sk_buff *skb){ 42.710 + int err = 0; 42.711 + Tunnel *tunnel = NULL; 42.712 + dprintf("> vnet=%u addr=" IPFMT "\n", vnet, NIPQUAD(addr)); 42.713 + err = vnet_tunnel_lookup(vnet, addr, &tunnel); 42.714 + if(err) goto exit; 42.715 + err = Tunnel_send(tunnel, skb); 42.716 + Tunnel_decref(tunnel); 42.717 + exit: 42.718 + dprintf("< err=%d\n", err); 42.719 + return err; 42.720 +} 42.721 + 42.722 +static void __exit vnet_module_exit(void){ 42.723 + ProcFS_exit(); 42.724 + sa_table_exit(); 42.725 + vnet_exit(); 42.726 + esp_module_exit(); 42.727 + etherip_module_exit(); 42.728 + tunnel_module_init(); 42.729 + random_module_exit(); 42.730 +} 42.731 + 42.732 +/** Initialize the vnet module. 42.733 + * Failure is fatal. 42.734 + * 42.735 + * @return 0 on success, error code otherwise 42.736 + */ 42.737 +static int __init vnet_module_init(void){ 42.738 + int err = 0; 42.739 + 42.740 + dprintf(">\n"); 42.741 + err = random_module_init(); 42.742 + if(err) wprintf("> random_module_init err=%d\n", err); 42.743 + if(err) goto exit; 42.744 + err = tunnel_module_init(); 42.745 + if(err) wprintf("> tunnel_module_init err=%d\n", err); 42.746 + if(err) goto exit; 42.747 + err = etherip_module_init(); 42.748 + if(err) wprintf("> etherip_module_init err=%d\n", err); 42.749 + if(err) goto exit; 42.750 + err = esp_module_init(); 42.751 + if(err) wprintf("> esp_module_init err=%d\n", err); 42.752 + if(err) goto exit; 42.753 + err = vnet_init(); 42.754 + if(err) wprintf("> vnet_init err=%d\n", err); 42.755 + if(err) goto exit; 42.756 + sa_algorithm_probe_all(); 42.757 + err = sa_table_init(); 42.758 + if(err) wprintf("> sa_table_init err=%d\n", err); 42.759 + ProcFS_init(); 42.760 + exit: 42.761 + if(err < 0){ 42.762 + vnet_module_exit(); 42.763 + } 42.764 + if(err < 0) wprintf("< err=%d\n", err); 42.765 + return err; 42.766 +} 42.767 + 42.768 +module_init(vnet_module_init); 42.769 +module_exit(vnet_module_exit); 42.770 +MODULE_LICENSE("GPL");
43.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 43.2 +++ b/tools/vnet/vnet-module/vnet.h Mon Nov 22 16:49:15 2004 +0000 43.3 @@ -0,0 +1,88 @@ 43.4 +/* 43.5 + * Copyright (C) 2004 Mike Wray <mike.wray@hp.com> 43.6 + * 43.7 + * This program is free software; you can redistribute it and/or modify 43.8 + * it under the terms of the GNU General Public License as published by the 43.9 + * Free Software Foundation; either version 2 of the License, or (at your 43.10 + * option) any later version. 43.11 + * 43.12 + * This program is distributed in the hope that it will be useful, but 43.13 + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 43.14 + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 43.15 + * for more details. 43.16 + * 43.17 + * You should have received a copy of the GNU General Public License along 43.18 + * with this program; if not, write to the Free software Foundation, Inc., 43.19 + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA 43.20 + * 43.21 + */ 43.22 +#ifndef __VNET_VNET_H__ 43.23 +#define __VNET_VNET_H__ 43.24 + 43.25 +#include <asm/atomic.h> 43.26 +#include <linux/skbuff.h> 43.27 + 43.28 +#include <tunnel.h> 43.29 +#include <skb_context.h> 43.30 + 43.31 +struct Vmac; 43.32 +struct Vif; 43.33 +struct net_device; 43.34 + 43.35 +typedef uint32_t vnetid_t; 43.36 +typedef uint32_t vnetaddr_t; 43.37 + 43.38 +/** Vnet property record. */ 43.39 +typedef struct Vnet { 43.40 + /** Reference count. */ 43.41 + atomic_t refcount; 43.42 + /** Vnet id. */ 43.43 + vnetid_t vnet; 43.44 + /** Security flag. If true the vnet requires ESP. */ 43.45 + int security; 43.46 + 43.47 + struct net_device *dev; 43.48 + struct net_device *bridge; 43.49 + 43.50 + /** Max size of the header. */ 43.51 + int header_n; 43.52 + /** Statistics. */ 43.53 + struct net_device_stats stats; 43.54 + int recursion; 43.55 +} Vnet; 43.56 + 43.57 +extern int Vnet_lookup(vnetid_t id, Vnet **vnet); 43.58 +extern int Vnet_add(Vnet *vnet); 43.59 +extern int Vnet_del(vnetid_t vnet); 43.60 +extern void Vnet_incref(Vnet *); 43.61 +extern void Vnet_decref(Vnet *); 43.62 +extern int Vnet_alloc(Vnet **vnet); 43.63 +extern Vnet *vnet_physical; 43.64 + 43.65 +extern int skb_xmit(struct sk_buff *skb); 43.66 +extern int vnet_skb_send(struct sk_buff *skb, u32 vnet); 43.67 +extern int vnet_skb_recv(struct sk_buff *skb, u32 vnet, struct Vmac *vmac); 43.68 + 43.69 +extern int vnet_check_context(int vnet, SkbContext *context, Vnet **vinfo); 43.70 + 43.71 +extern int vnet_tunnel_open(vnetid_t vnet, vnetaddr_t addr, Tunnel **tunnel); 43.72 +extern int vnet_tunnel_lookup(vnetid_t vnet, vnetaddr_t addr, Tunnel **tunnel); 43.73 +extern int vnet_tunnel_send(vnetid_t vnet, vnetaddr_t addr, struct sk_buff *skb); 43.74 + 43.75 +extern int vnet_init(void); 43.76 + 43.77 +enum { 43.78 + HANDLE_OK = 1, 43.79 + HANDLE_NO = 0, 43.80 +}; 43.81 + 43.82 +extern int vnet_sa_security(u32 spi, int protocol, u32 addr); 43.83 +struct SAState; 43.84 +extern int vnet_sa_create(u32 spi, int protocol, u32 addr, struct SAState **sa); 43.85 + 43.86 +enum { 43.87 + VNET_PHYS = 1, 43.88 + VNET_VIF = 2, 43.89 +}; 43.90 + 43.91 +#endif /* !__VNET_VNET_H__ */
44.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 44.2 +++ b/tools/vnet/vnet-module/vnet_dev.c Mon Nov 22 16:49:15 2004 +0000 44.3 @@ -0,0 +1,534 @@ 44.4 +/* 44.5 + * Copyright (C) 2004 Mike Wray <mike.wray@hp.com> 44.6 + * 44.7 + * This program is free software; you can redistribute it and/or modify 44.8 + * it under the terms of the GNU General Public License as published by the 44.9 + * Free Software Foundation; either version 2 of the License, or (at your 44.10 + * option) any later version. 44.11 + * 44.12 + * This program is distributed in the hope that it will be useful, but 44.13 + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 44.14 + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 44.15 + * for more details. 44.16 + * 44.17 + * You should have received a copy of the GNU General Public License along 44.18 + * with this program; if not, write to the Free software Foundation, Inc., 44.19 + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA 44.20 + * 44.21 + */ 44.22 +#include <linux/config.h> 44.23 +#include <linux/module.h> 44.24 +#include <linux/types.h> 44.25 +#include <linux/sched.h> 44.26 +#include <linux/kernel.h> 44.27 + 44.28 +#include <linux/skbuff.h> 44.29 +#include <linux/netdevice.h> 44.30 +#include <linux/in.h> 44.31 +#include <linux/tcp.h> 44.32 +#include <linux/udp.h> 44.33 + 44.34 +#include <net/ip.h> 44.35 +#include <net/protocol.h> 44.36 + 44.37 +#include <linux/if_arp.h> 44.38 +#include <linux/in6.h> 44.39 +#include <linux/inetdevice.h> 44.40 +#include <linux/arcdevice.h> 44.41 +#include <linux/if_bridge.h> 44.42 + 44.43 +#include <etherip.h> 44.44 +#include <vnet.h> 44.45 +#include <varp.h> 44.46 +#include <vif.h> 44.47 +#include <vnet_dev.h> 44.48 + 44.49 +#define MODULE_NAME "VNET" 44.50 +#define DEBUG 1 44.51 +#undef DEBUG 44.52 +#include "debug.h" 44.53 + 44.54 +#define VNETIF_FMT "vnetif%u" 44.55 +#define VNETBR_FMT "vnet%u" 44.56 + 44.57 +#ifndef CONFIG_BRIDGE 44.58 +#error Must configure ethernet bridging in Network Options 44.59 +#endif 44.60 + 44.61 +#include <linux/../../net/bridge/br_private.h> 44.62 +#define dev_bridge(_dev) ((struct net_bridge *)(_dev)->priv) 44.63 + 44.64 +static void vnet_dev_destructor(struct net_device *dev){ 44.65 + dprintf(">\n"); 44.66 + dev->open = NULL; 44.67 + dev->stop = NULL; 44.68 + dev->uninit = NULL; 44.69 + dev->destructor = NULL; 44.70 + dev->hard_start_xmit = NULL; 44.71 + dev->get_stats = NULL; 44.72 + dev->do_ioctl = NULL; 44.73 + dev->change_mtu = NULL; 44.74 + 44.75 + dev->tx_timeout = NULL; 44.76 + dev->set_multicast_list = NULL; 44.77 + dev->flags = 0; 44.78 + 44.79 + dev->priv = NULL; 44.80 +} 44.81 + 44.82 +static void vnet_dev_uninit(struct net_device *dev){ 44.83 + //Vnet *vnet = dev->priv; 44.84 + dprintf(">\n"); 44.85 + //dev_put(dev); 44.86 + dprintf("<\n"); 44.87 +} 44.88 + 44.89 +static struct net_device_stats *vnet_dev_get_stats(struct net_device *dev){ 44.90 + Vnet *vnet = dev->priv; 44.91 + //dprintf(">\n"); 44.92 + return &vnet->stats; 44.93 +} 44.94 + 44.95 +static int vnet_dev_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd){ 44.96 + int err = 0; 44.97 + 44.98 + dprintf(">\n"); 44.99 + return err; 44.100 +} 44.101 + 44.102 +static int vnet_dev_change_mtu(struct net_device *dev, int mtu){ 44.103 + int err = 0; 44.104 + Vnet *vnet = dev->priv; 44.105 + if (mtu < 68 || mtu > 1500 - vnet->header_n){ 44.106 + err = -EINVAL; 44.107 + goto exit; 44.108 + } 44.109 + dev->mtu = mtu; 44.110 + exit: 44.111 + return err; 44.112 +} 44.113 + 44.114 +static int vnet_dev_set_name(struct net_device *dev){ 44.115 + int err = 0; 44.116 + Vnet *vnet = (void*)dev->priv; 44.117 + 44.118 + dprintf(">\n"); 44.119 + dprintf("> vnet=%d\n", vnet->vnet); 44.120 + snprintf(dev->name, IFNAMSIZ - 1, VNETIF_FMT, vnet->vnet); 44.121 + if(__dev_get_by_name(dev->name)){ 44.122 + err = -ENOMEM; 44.123 + } 44.124 + dprintf("< err=%d\n", err); 44.125 + return err; 44.126 +} 44.127 + 44.128 +//============================================================================ 44.129 +#ifdef CONFIG_VNET_BRIDGE 44.130 + 44.131 +#define BRIDGE DEVICE 44.132 + 44.133 +void vnet_bridge_fini(Vnet *vnet){ 44.134 + if(!vnet) return; 44.135 + if(vnet->bridge){ 44.136 + br_del_bridge(vnet->bridge->name); 44.137 + vnet->bridge = NULL; 44.138 + } 44.139 +} 44.140 + 44.141 +/** Create the bridge for a vnet, and add the 44.142 + * vnet interface to it. 44.143 + * 44.144 + * @param vnet vnet 44.145 + * @return 0 on success, error code otherwise 44.146 + */ 44.147 +int vnet_bridge_init(Vnet *vnet){ 44.148 + int err = 0; 44.149 + char bridge[IFNAMSIZ] = {}; 44.150 + struct net_bridge *br; 44.151 + vnet->bridge = NULL; 44.152 + snprintf(bridge, IFNAMSIZ - 1, VNETBR_FMT, vnet->vnet); 44.153 + rtnl_lock(); 44.154 + err = br_add_bridge(bridge); 44.155 + rtnl_unlock(); 44.156 + if(err){ 44.157 + dprintf("> Error creating vnet bridge %s: err=%d\n", bridge, err); 44.158 + goto exit; 44.159 + } 44.160 + vnet->bridge = __dev_get_by_name(bridge); 44.161 + if(!vnet->bridge){ 44.162 + wprintf("> Vnet bridge %s is null!\n", bridge); 44.163 + err = -EINVAL; 44.164 + goto exit; 44.165 + } 44.166 + br = dev_bridge(vnet->bridge); 44.167 + br->stp_enabled = 0; 44.168 + br->bridge_hello_time = 0; 44.169 + br->hello_time = 0; 44.170 + br->bridge_forward_delay = 0; 44.171 + br->forward_delay = 0; 44.172 + rtnl_lock(); 44.173 + err = br_add_if(br, vnet->dev); 44.174 + rtnl_unlock(); 44.175 + if(err){ 44.176 + dprintf("> Error adding vif %s to vnet bridge %s: err=%d\n", 44.177 + vnet->dev->name, bridge, err); 44.178 + goto exit; 44.179 + } 44.180 + rtnl_lock(); 44.181 + dev_open(vnet->dev); 44.182 + dev_open(vnet->bridge); 44.183 + rtnl_unlock(); 44.184 + exit: 44.185 + if(err){ 44.186 + if(vnet->bridge){ 44.187 + rtnl_lock(); 44.188 + br_del_bridge(bridge); 44.189 + rtnl_unlock(); 44.190 + vnet->bridge = NULL; 44.191 + } 44.192 + } 44.193 + return err; 44.194 +} 44.195 + 44.196 + 44.197 +/** Add an interface to the bridge for a vnet. 44.198 + * 44.199 + * @param vnet vnet 44.200 + * @param dev interface 44.201 + * @return 0 on success, error code otherwise 44.202 + */ 44.203 +int vnet_add_if(Vnet *vnet, struct net_device *dev){ 44.204 + int err = 0; 44.205 + struct net_device *brdev; 44.206 + 44.207 + dprintf(">\n"); 44.208 + if(!vnet->bridge){ 44.209 + err = -EINVAL; 44.210 + goto exit; 44.211 + } 44.212 + // Delete the interface from the default bridge. 44.213 + // todo: Really want to delete it from any bridge it's in. 44.214 + if(!vnet_get_device(BRIDGE, &brdev)){ 44.215 + rtnl_lock(); 44.216 + br_del_if(dev_bridge(brdev), dev); 44.217 + rtnl_unlock(); 44.218 + } 44.219 + dprintf("> br_add_if %s %s\n", vnet->bridge->name, dev->name); 44.220 + rtnl_lock(); 44.221 + dev_open(dev); 44.222 + dev_open(vnet->bridge); 44.223 + err = br_add_if(dev_bridge(vnet->bridge), dev); 44.224 + rtnl_unlock(); 44.225 + exit: 44.226 + dprintf("< err=%d\n", err); 44.227 + return err; 44.228 +} 44.229 + 44.230 +int vnet_del_if(Vnet *vnet, struct net_device *dev){ 44.231 + int err = 0; 44.232 + 44.233 + dprintf(">\n"); 44.234 + if(!vnet->bridge){ 44.235 + err = -EINVAL; 44.236 + goto exit; 44.237 + } 44.238 + rtnl_lock(); 44.239 + br_del_if(dev_bridge(vnet->bridge), dev); 44.240 + rtnl_unlock(); 44.241 + exit: 44.242 + dprintf("< err=%d\n", err); 44.243 + return err; 44.244 +} 44.245 + 44.246 + 44.247 +/** Create the bridge and virtual interface for a vnet. 44.248 + * 44.249 + * @param info vnet 44.250 + * @return 0 on success, error code otherwise 44.251 + */ 44.252 +int Vnet_create(Vnet *info){ 44.253 + int err = 0; 44.254 + 44.255 + dprintf("> %u\n", info->vnet); 44.256 + err = vnet_dev_add(info); 44.257 + if(err) goto exit; 44.258 + dprintf("> vnet_bridge_init\n"); 44.259 + err = vnet_bridge_init(info); 44.260 + if(err) goto exit; 44.261 + dprintf("> Vnet_add...\n"); 44.262 + err = Vnet_add(info); 44.263 + exit: 44.264 + if(err){ 44.265 + dprintf("> vnet_bridge_fini...\n"); 44.266 + vnet_bridge_fini(info); 44.267 + } 44.268 + dprintf("< err=%d\n", err); 44.269 + return err; 44.270 +} 44.271 + 44.272 + 44.273 + 44.274 +/** Remove the net device for a vnet. 44.275 + * Clears the dev field of the vnet. 44.276 + * Safe to call if the vnet or its dev are null. 44.277 + * 44.278 + * @param vnet vnet 44.279 + */ 44.280 +void vnet_dev_remove(Vnet *vnet){ 44.281 + if(!vnet) return; 44.282 + dprintf("> vnet=%u\n", vnet->vnet); 44.283 + if(vnet->bridge){ 44.284 + dprintf("> br_del_bridge(%s)\n", vnet->bridge->name); 44.285 + rtnl_lock(); 44.286 + br_del_bridge(vnet->bridge->name); 44.287 + rtnl_unlock(); 44.288 + vnet->bridge = NULL; 44.289 + } 44.290 + if(vnet->dev){ 44.291 + //dev_put(vnet->dev); 44.292 + dprintf("> unregister_netdev(%s)\n", vnet->dev->name); 44.293 + unregister_netdev(vnet->dev); 44.294 + vnet->dev = NULL; 44.295 + } 44.296 + dprintf("<\n"); 44.297 +} 44.298 + 44.299 +//============================================================================ 44.300 +#else 44.301 +//============================================================================ 44.302 + 44.303 +/** Create the virtual interface for a vnet. 44.304 + * 44.305 + * @param info vnet 44.306 + * @return 0 on success, error code otherwise 44.307 + */ 44.308 +int Vnet_create(Vnet *info){ 44.309 + int err = 0; 44.310 + 44.311 + dprintf("> %u\n", info->vnet); 44.312 + err = vnet_dev_add(info); 44.313 + if(err) goto exit; 44.314 + dprintf("> Vnet_add...\n"); 44.315 + err = Vnet_add(info); 44.316 + exit: 44.317 + dprintf("< err=%d\n", err); 44.318 + return err; 44.319 +} 44.320 + 44.321 +int vnet_add_if(Vnet *vnet, struct net_device *dev){ 44.322 + int err = -ENOSYS; 44.323 + return err; 44.324 +} 44.325 + 44.326 + 44.327 +int vnet_del_if(Vnet *vnet, struct net_device *dev){ 44.328 + int err = 0; 44.329 + return err; 44.330 +} 44.331 + 44.332 +/** Remove the net device for a vnet. 44.333 + * Clears the dev field of the vnet. 44.334 + * Safe to call if the vnet or its dev are null. 44.335 + * 44.336 + * @param vnet vnet 44.337 + */ 44.338 +void vnet_dev_remove(Vnet *vnet){ 44.339 + if(!vnet) return; 44.340 + dprintf("> vnet=%u\n", vnet->vnet); 44.341 + if(vnet->dev){ 44.342 + //dev_put(vnet->dev); 44.343 + dprintf("> unregister_netdev(%s)\n", vnet->dev->name); 44.344 + unregister_netdev(vnet->dev); 44.345 + vnet->dev = NULL; 44.346 + } 44.347 + dprintf("<\n"); 44.348 +} 44.349 +#endif 44.350 +//============================================================================ 44.351 + 44.352 +static int vnet_dev_open(struct net_device *dev){ 44.353 + int err = 0; 44.354 + dprintf(">\n"); 44.355 + netif_start_queue(dev); 44.356 + dprintf("<\n"); 44.357 + return err; 44.358 +} 44.359 + 44.360 +static int vnet_dev_stop(struct net_device *dev){ 44.361 + int err = 0; 44.362 + dprintf(">\n"); 44.363 + netif_stop_queue(dev); 44.364 + dprintf("<\n"); 44.365 + return err; 44.366 +} 44.367 + 44.368 +static int vnet_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev){ 44.369 + int err = 0; 44.370 + Vnet *vnet = dev->priv; 44.371 + 44.372 + dprintf("> skb=%p\n", skb); 44.373 + if(vnet->recursion++) { 44.374 + vnet->stats.collisions++; 44.375 + vnet->stats.tx_errors++; 44.376 + wprintf("> recursion!\n"); 44.377 + dev_kfree_skb(skb); 44.378 + goto exit; 44.379 + } 44.380 + if(!skb){ 44.381 + err = -EINVAL; 44.382 + wprintf("> skb NULL!\n"); 44.383 + goto exit; 44.384 + } 44.385 + dprintf("> skb->data=%p skb->mac.raw=%p\n", skb->data, skb->mac.raw); 44.386 + if(skb->mac.raw < skb->data || skb->mac.raw > skb->nh.raw){ 44.387 + wprintf("> skb mac duff!\n"); 44.388 + skb->mac.raw = skb->data; 44.389 + } 44.390 + //dev->trans_start = jiffies; 44.391 + err = vnet_skb_send(skb, vnet->vnet); 44.392 + if(err < 0){ 44.393 + vnet->stats.tx_errors++; 44.394 + } else { 44.395 + vnet->stats.tx_packets++; 44.396 + vnet->stats.tx_bytes += skb->len; 44.397 + } 44.398 + exit: 44.399 + vnet->recursion--; 44.400 + dprintf("<\n"); 44.401 + return 0; 44.402 +} 44.403 + 44.404 +void vnet_dev_tx_timeout(struct net_device *dev){ 44.405 + dprintf(">\n"); 44.406 + //dev->trans_start = jiffies; 44.407 + //netif_wake_queue(dev); 44.408 +} 44.409 + 44.410 +void vnet_dev_set_multicast_list(struct net_device *dev){ 44.411 + dprintf(">\n"); 44.412 +} 44.413 + 44.414 +static int (*eth_hard_header)(struct sk_buff *skb, 44.415 + struct net_device *dev, unsigned short type, 44.416 + void *daddr, void *saddr, unsigned len) = NULL; 44.417 + 44.418 +static int vnet_dev_hard_header(struct sk_buff *skb, 44.419 + struct net_device *dev, unsigned short type, 44.420 + void *daddr, void *saddr, unsigned len){ 44.421 + int err = 0; 44.422 + dprintf("> skb=%p ethhdr=%p dev=%s len=%u\n", 44.423 + skb, skb->mac.raw, dev->name, len); 44.424 + if(saddr){ 44.425 + dprintf("> saddr=" MACFMT "\n", MAC6TUPLE((unsigned char*)saddr)); 44.426 + } else { 44.427 + dprintf("> saddr=NULL\n"); 44.428 + } 44.429 + if(daddr){ 44.430 + dprintf("> daddr=" MACFMT "\n", MAC6TUPLE((unsigned char*)daddr)); 44.431 + } else { 44.432 + dprintf("> daddr=NULL\n"); 44.433 + } 44.434 + err = eth_hard_header(skb, dev, type, daddr, saddr, len); 44.435 + dprintf("> eth_hard_header=%d\n", err); 44.436 + skb->mac.raw = skb->data; 44.437 + dprintf("> src=" MACFMT " dst=" MACFMT "\n", 44.438 + MAC6TUPLE(skb->mac.ethernet->h_source), 44.439 + MAC6TUPLE(skb->mac.ethernet->h_dest)); 44.440 + dprintf("< err=%d\n", err); 44.441 + return err; 44.442 +} 44.443 + 44.444 +void vnet_dev_mac(unsigned char *mac){ 44.445 + static unsigned val = 1; 44.446 + struct net_device *dev; 44.447 + 44.448 + if(vnet_get_device(DEVICE, &dev)){ 44.449 + mac[0] = 0xAA; 44.450 + mac[1] = 0xFF; 44.451 + mac[2] = (unsigned char)((val >> 24) & 0xff); 44.452 + mac[3] = (unsigned char)((val >> 16) & 0xff); 44.453 + mac[4] = (unsigned char)((val >> 8) & 0xff); 44.454 + mac[5] = (unsigned char)((val ) & 0xff); 44.455 + val++; 44.456 + } else { 44.457 + memcpy(mac, dev->dev_addr, ETH_ALEN); 44.458 + dev_put(dev); 44.459 + } 44.460 +} 44.461 + 44.462 +static int vnet_dev_init(struct net_device *dev){ 44.463 + int err = 0; 44.464 + Vnet *vnet = (void*)dev->priv; 44.465 + 44.466 + dprintf(">\n"); 44.467 + ether_setup(dev); 44.468 + 44.469 + if(!eth_hard_header) eth_hard_header = dev->hard_header; 44.470 + dev->hard_header = vnet_dev_hard_header; 44.471 + 44.472 + dev->open = vnet_dev_open; 44.473 + dev->stop = vnet_dev_stop; 44.474 + dev->uninit = vnet_dev_uninit; 44.475 + dev->destructor = vnet_dev_destructor; 44.476 + dev->hard_start_xmit = vnet_dev_hard_start_xmit; 44.477 + dev->get_stats = vnet_dev_get_stats; 44.478 + dev->do_ioctl = vnet_dev_do_ioctl; 44.479 + dev->change_mtu = vnet_dev_change_mtu; 44.480 + 44.481 + dev->tx_timeout = vnet_dev_tx_timeout; 44.482 + dev->watchdog_timeo = TX_TIMEOUT; 44.483 + dev->set_multicast_list = vnet_dev_set_multicast_list; 44.484 + 44.485 + dev->hard_header_len += vnet->header_n; 44.486 + dev->mtu -= vnet->header_n; 44.487 + 44.488 + vnet_dev_mac(dev->dev_addr); 44.489 + 44.490 + dev->flags |= IFF_DEBUG; 44.491 + dev->flags |= IFF_PROMISC; 44.492 + dev->flags |= IFF_ALLMULTI; 44.493 + 44.494 + dprintf("<\n"); 44.495 + return err; 44.496 +} 44.497 + 44.498 +/** Add the interface (net device) for a vnet. 44.499 + * Sets the dev field of the vnet on success. 44.500 + * Does nothing if the vif already has an interface. 44.501 + * 44.502 + * @param vif vif 44.503 + * @return 0 on success, error code otherwise 44.504 + */ 44.505 +int vnet_dev_add(Vnet *vnet){ 44.506 + int err = 0; 44.507 + struct net_device *dev = NULL; 44.508 + 44.509 + dprintf("> vnet=%p\n", vnet); 44.510 + if(vnet->dev) goto exit; 44.511 + vnet->header_n = sizeof(struct iphdr) + sizeof(struct etheriphdr); 44.512 + dev = kmalloc(sizeof(struct net_device), GFP_ATOMIC); 44.513 + if(!dev){ err = -ENOMEM; goto exit; } 44.514 + *dev = (struct net_device){}; 44.515 + dev->priv = vnet; 44.516 + vnet->dev = dev; 44.517 + 44.518 + err = vnet_dev_set_name(dev); 44.519 + if(err) goto exit; 44.520 + vnet_dev_init(dev); 44.521 + dprintf("> name=%s, register_netdev...\n", dev->name); 44.522 + err = register_netdev(dev); 44.523 + dprintf("> register_netdev=%d\n", err); 44.524 + if(err) goto exit; 44.525 + rtnl_lock(); 44.526 + dev_open(dev); 44.527 + rtnl_unlock(); 44.528 + 44.529 + //dev_hold(dev); 44.530 + exit: 44.531 + if(err){ 44.532 + if(dev) kfree(dev); 44.533 + vnet->dev = NULL; 44.534 + } 44.535 + dprintf("< err=%d\n", err); 44.536 + return err; 44.537 +}
45.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 45.2 +++ b/tools/vnet/vnet-module/vnet_dev.h Mon Nov 22 16:49:15 2004 +0000 45.3 @@ -0,0 +1,31 @@ 45.4 +/* 45.5 + * Copyright (C) 2004 Mike Wray <mike.wray@hp.com> 45.6 + * 45.7 + * This program is free software; you can redistribute it and/or modify 45.8 + * it under the terms of the GNU General Public License as published by the 45.9 + * Free Software Foundation; either version 2 of the License, or (at your 45.10 + * option) any later version. 45.11 + * 45.12 + * This program is distributed in the hope that it will be useful, but 45.13 + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 45.14 + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 45.15 + * for more details. 45.16 + * 45.17 + * You should have received a copy of the GNU General Public License along 45.18 + * with this program; if not, write to the Free software Foundation, Inc., 45.19 + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA 45.20 + * 45.21 + */ 45.22 +#ifndef _VNET_VNET_DEV_H_ 45.23 +#define _VNET_VNET_DEV_H_ 45.24 + 45.25 +struct Vnet; 45.26 +struct net_device; 45.27 + 45.28 +extern int vnet_dev_add(struct Vnet *vnet); 45.29 +extern void vnet_dev_remove(struct Vnet *vnet); 45.30 +extern int Vnet_create(struct Vnet *info); 45.31 +extern int vnet_add_if(struct Vnet *vnet, struct net_device *dev); 45.32 +extern int vnet_del_if(struct Vnet *vnet, struct net_device *dev); 45.33 + 45.34 +#endif
46.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 46.2 +++ b/tools/vnet/vnet-module/vnet_ioctl.c Mon Nov 22 16:49:15 2004 +0000 46.3 @@ -0,0 +1,815 @@ 46.4 +/* 46.5 + * Copyright (C) 2004 Mike Wray <mike.wray@hp.com> 46.6 + * 46.7 + * This program is free software; you can redistribute it and/or modify 46.8 + * it under the terms of the GNU General Public License as published by the 46.9 + * Free Software Foundation; either version 2 of the License, or (at your 46.10 + * option) any later version. 46.11 + * 46.12 + * This program is distributed in the hope that it will be useful, but 46.13 + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 46.14 + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 46.15 + * for more details. 46.16 + * 46.17 + * You should have received a copy of the GNU General Public License along 46.18 + * with this program; if not, write to the Free software Foundation, Inc., 46.19 + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA 46.20 + * 46.21 + */ 46.22 +#include <linux/config.h> 46.23 +#include <linux/module.h> 46.24 + 46.25 +#include <linux/types.h> 46.26 +#include <linux/kernel.h> 46.27 +#include <linux/errno.h> 46.28 + 46.29 +#include <asm/uaccess.h> 46.30 + 46.31 +#include <linux/slab.h> 46.32 + 46.33 +#include <linux/proc_fs.h> 46.34 +#include <linux/string.h> 46.35 + 46.36 +#include <linux/net.h> 46.37 +#include <linux/in.h> 46.38 +#include <linux/inet.h> 46.39 +#include <linux/netdevice.h> 46.40 + 46.41 +#include <sa.h> 46.42 +#include "vif.h" 46.43 +#include "vnet.h" 46.44 +#include "varp.h" 46.45 +#include "vnet_dev.h" 46.46 + 46.47 +#include "sxpr_parser.h" 46.48 +#include "iostream.h" 46.49 +#include "kernel_stream.h" 46.50 +#include "sys_string.h" 46.51 +#include "sys_net.h" 46.52 + 46.53 +#define MODULE_NAME "VNET" 46.54 +#define DEBUG 1 46.55 +#undef DEBUG 46.56 +#include "debug.h" 46.57 + 46.58 +// Functions to manage vnets. 46.59 +/* 46.60 + 46.61 +Have to rely on ethernet bridging being configured - but we can't rely 46.62 +on the kernel interface being available to us (it's not exported @!$"%!). 46.63 + 46.64 +Create a vnet N: 46.65 +- create the vnet device vnetifN: using commands to /proc, kernel api 46.66 +- create the vnet bridge vnetN: using brctl in user-space 46.67 +- for best results something should keep track of the mapping vnet id <-> bridge name 46.68 + 46.69 +Add vif device vifD.N to vnet N. 46.70 +- domain is configured with vifD.N on bridge vnetN 46.71 +- vif script adds vif to bridge using brctl 46.72 +- vif script detects that the bridge is a vnet bridge and 46.73 + uses /proc commands to configure the mac on the vnet 46.74 + 46.75 +Wouldn't be hard to add support for specifying vnet keys(s) in 46.76 +the control interface. 46.77 + 46.78 +*/ 46.79 + 46.80 + // id vnet id 46.81 + // security security level 46.82 + // ciphersuite: digest, cipher, keys?? 46.83 +/* Security policy. 46.84 + vnet 46.85 + src: mac 46.86 + dst: mac 46.87 + coa: ip 46.88 + Map vnet x coa -> security (none, auth, conf) 46.89 + 46.90 + Policy, e.g. 46.91 + - same subnet x vnet 46.92 + - diff subnet x vnet 46.93 + - some subnet x vnet 46.94 + - some host addr x vnet 46.95 + 46.96 + (security (net local) (vnet *) (mode none)) 46.97 + (security (net (not local)) 46.98 + 46.99 + (security (addr, vnet) (local-subnet addr) none) 46.100 + (security (addr, vnet) (not (local-subnet addr)) conf) 46.101 + (security (addr, vnet) (host 15.144.27.80) 46.102 + (security (addr, vnet) (subnet addr 15.144.24.0/24) auth) 46.103 + (security (addr, vnet) t auth) 46.104 + 46.105 + (security (addr local) (mode none)) 46.106 + (security (addr local/16) (mode none)) 46.107 + (security (addr 15.144.0.0/16) (mode auth)) 46.108 + (security (addr 15.0.0.0/8) (mode conf)) 46.109 + (security (addr *) (mode drop)) 46.110 + 46.111 + ?Varp security 46.112 + Use esp too - none, auth, conf, 46.113 + Varp sends broadcasts (requests) and unicasts (replies). 46.114 + Uses UDP. Could send over ESP if needed. 46.115 + For bcast don't know where it goes, so security has to be by vnet. 46.116 + For ucast know where it goes, so could do by vnet and addr. 46.117 + 46.118 + Similar issue for vnets: know where unicast goes but don't know where 46.119 + bcast goes. 46.120 + 46.121 + Simplify: 2 levels 46.122 + local ucast 46.123 + nonlocal ucast, mcast 46.124 + 46.125 + (security (local none) (nonlocal conf)) 46.126 + (security (local auth) (nonlocal conf)) 46.127 + 46.128 + VARP security matches vnet security. 46.129 + 46.130 + */ 46.131 + 46.132 +/** @file 46.133 + * 46.134 + * Kernel interface to files in /proc. 46.135 + */ 46.136 + 46.137 +#define PROC_ROOT "/proc/" 46.138 +#define PROC_ROOT_LEN 6 46.139 +#define MODULE_ROOT PROC_ROOT "vnet" 46.140 + 46.141 +enum { 46.142 + VNET_POLICY = 1, 46.143 +}; 46.144 + 46.145 +typedef struct proc_dir_entry ProcEntry; 46.146 +typedef struct inode Inode; 46.147 +typedef struct file File; 46.148 + 46.149 +static int proc_open_fn(struct inode *inode, File *file); 46.150 +static ssize_t proc_read_fn(File *file, char *buffer, size_t count, loff_t *offset); 46.151 +static ssize_t proc_write_fn(File *file, const char *buffer, size_t count, loff_t *offset) ; 46.152 +//static int proc_flush_fn(File *file); 46.153 +static loff_t proc_lseek_fn(File * file, loff_t offset, int orig); 46.154 +static int proc_ioctl_fn(struct inode *inode, File *file, unsigned opcode, unsigned long arg); 46.155 +static int proc_release_fn(struct inode *inode, File *file); 46.156 + 46.157 +static int eval(Sxpr exp); 46.158 + 46.159 +static int ProcEntry_has_name(ProcEntry *entry, const char *name, int namelen){ 46.160 + dprintf("> name=%.*s entry=%.*s\n", namelen, name, entry->namelen, entry->name); 46.161 + if(!entry || !entry->low_ino) return FALSE; 46.162 + if(entry->namelen != namelen) return FALSE; 46.163 + return memcmp(name, entry->name, namelen) == 0; 46.164 +} 46.165 + 46.166 +// Set f->f_error on error? 46.167 +// Does interface stop r/w on first error? 46.168 +// Is release called after an error? 46.169 +// 46.170 + 46.171 +static struct file_operations proc_file_ops = { 46.172 + //owner: THIS_MODULE, 46.173 + open: proc_open_fn, 46.174 + read: proc_read_fn, 46.175 + write: proc_write_fn, 46.176 + //flush: proc_flush_fn, 46.177 + llseek: proc_lseek_fn, 46.178 + ioctl: proc_ioctl_fn, 46.179 + release: proc_release_fn, 46.180 +}; 46.181 + 46.182 +static int proc_get_parser(File *file, Parser **val){ 46.183 + int err = 0; 46.184 + Parser *parser = NULL; 46.185 + parser = file->private_data; 46.186 + if(!parser){ 46.187 + parser = Parser_new(); 46.188 + if(!parser){ 46.189 + err = -ENOMEM; 46.190 + goto exit; 46.191 + } 46.192 + file->private_data = parser; 46.193 + } 46.194 + exit: 46.195 + *val = parser; 46.196 + return err; 46.197 +} 46.198 + 46.199 +static int proc_open_fn(Inode *inode, File *file){ 46.200 + // User open. 46.201 + // Return errcode or 0 on success. 46.202 + // Can stuff data in file->private_data (void*). 46.203 + // Get entry from 46.204 + //ProcEntry *entry = (ProcEntry *)inode->u.generic_ip; 46.205 + //file->private_data = NULL; 46.206 + // Check for user privilege - deny otherwise. 46.207 + // -EACCESS 46.208 + int err = 0; 46.209 + dprintf(">\n"); 46.210 + file->private_data = NULL; 46.211 + return err; 46.212 +} 46.213 + 46.214 +static ssize_t proc_read_fn(File *file, char *buffer, 46.215 + size_t count, loff_t *offset){ 46.216 + // User read. 46.217 + // Copy data to user buffer, increment offset by count, return count. 46.218 + dprintf(">\n"); 46.219 + count = 0; 46.220 + //if(copy_to_user(buffer, data, count)){ 46.221 + // return -EFAULT; 46.222 + //} 46.223 + //*offset += count; 46.224 + return count; 46.225 +} 46.226 + 46.227 +static ssize_t proc_write_fn(File *file, const char *buffer, 46.228 + size_t count, loff_t *offset) { 46.229 + // User write. 46.230 + // Copy data into kernel space from buffer. 46.231 + // Increment offset by count, return count (or code). 46.232 + int err = 0; 46.233 + char *data = NULL; 46.234 + Parser *parser = NULL; 46.235 + 46.236 + //dprintf("> count=%d\n", count); 46.237 + err = proc_get_parser(file, &parser); 46.238 + if(err) goto exit; 46.239 + data = allocate(count); 46.240 + if(!data){ 46.241 + err = -ENOMEM; 46.242 + goto exit; 46.243 + } 46.244 + err = copy_from_user(data, buffer, count); 46.245 + if(err) goto exit; 46.246 + *offset += count; 46.247 + err = Parser_input(parser, data, count); 46.248 + exit: 46.249 + deallocate(data); 46.250 + err = (err < 0 ? err : count); 46.251 + //dprintf("< err = %d\n", err); 46.252 + return err; 46.253 +} 46.254 + 46.255 +#if 0 46.256 +static int proc_flush_fn(File *file){ 46.257 + // User flush. 46.258 + int writing = (file->f_flags & O_ACCMODE) == O_WRONLY; 46.259 + int f_count = atomic_read(&file->f_count); 46.260 + if (writing && f_count == 1) { 46.261 + ProcEntry *pentry = (ProcEntry *)file->f_dentry->d_inode->u.generic_ip; 46.262 + // ... 46.263 + } 46.264 + return retval; 46.265 +} 46.266 +#endif 46.267 + 46.268 +#ifndef SEEK_SET 46.269 +enum { 46.270 + /** Offset from start. */ 46.271 + SEEK_SET = 0, 46.272 + /** Offset from current position. */ 46.273 + SEEK_CUR = 1, 46.274 + /** Offset from size of file. */ 46.275 + SEEK_END = 2 46.276 +}; 46.277 +#endif /* !SEEK_SET */ 46.278 + 46.279 +static loff_t proc_lseek_fn(File * file, loff_t offset, int from){ 46.280 + // User lseek. 46.281 + dprintf(">\n"); 46.282 + switch(from){ 46.283 + case SEEK_SET: 46.284 + break; 46.285 + case SEEK_CUR: 46.286 + offset += file->f_pos; 46.287 + break; 46.288 + case SEEK_END: 46.289 + return -EINVAL; 46.290 + default: 46.291 + return -EINVAL; 46.292 + } 46.293 + if(offset < 0) return -EINVAL; 46.294 + file->f_pos = offset; 46.295 + return offset; 46.296 +} 46.297 + 46.298 +static int proc_ioctl_fn(Inode *inode, File *file, 46.299 + unsigned opcode, unsigned long arg){ 46.300 + // User ioctl. 46.301 + dprintf(">\n"); 46.302 + return 0; 46.303 +} 46.304 + 46.305 +static int proc_release_fn(Inode *inode, File *file){ 46.306 + // User close. 46.307 + // Cleanup file->private_data, return errcode. 46.308 + int err = 0; 46.309 + Parser *parser = NULL; 46.310 + Sxpr obj, l; 46.311 + 46.312 + dprintf(">\n"); 46.313 + err = proc_get_parser(file, &parser); 46.314 + if(err) goto exit; 46.315 + err = Parser_input(parser, NULL, 0); 46.316 + if(err) goto exit; 46.317 + obj = parser->val; 46.318 + objprint(iostdout, obj, 0); IOStream_print(iostdout, "\n"); 46.319 + for(l = obj; CONSP(l); l = CDR(l)){ 46.320 + err = eval(CAR(l)); 46.321 + if(err) break; 46.322 + } 46.323 + exit: 46.324 + Parser_free(parser); 46.325 + file->private_data = NULL; 46.326 + dprintf("< err=%d\n", err); 46.327 + return err; 46.328 +} 46.329 + 46.330 +static ProcEntry *proc_fs_root = &proc_root; 46.331 + 46.332 +static int proc_path_init(const char *path, const char **rest){ 46.333 + int err = 0; 46.334 + 46.335 + if(!path){ 46.336 + err = -EINVAL; 46.337 + goto exit; 46.338 + } 46.339 + if(*path == '/'){ 46.340 + if(strncmp(PROC_ROOT, path, PROC_ROOT_LEN)){ 46.341 + err = -EINVAL; 46.342 + } else { 46.343 + path += PROC_ROOT_LEN; 46.344 + } 46.345 + } 46.346 + exit: 46.347 + *rest = path; 46.348 + return err; 46.349 +} 46.350 + 46.351 + 46.352 +/** Parse a path relative to `dir'. If dir is null or the proc root 46.353 + * the path is relative to "/proc/", and the leading "/proc/" may be 46.354 + * supplied. 46.355 + * 46.356 + */ 46.357 +static ProcEntry * ProcFS_lookup(const char *path, ProcEntry *dir){ 46.358 + const char *pathptr = path, *next = NULL; 46.359 + ProcEntry *entry, *result = NULL; 46.360 + int pathlen; 46.361 + 46.362 + if(dir && (dir != proc_fs_root)){ 46.363 + entry = dir; 46.364 + } else { 46.365 + if(proc_path_init(path, &pathptr)) goto exit; 46.366 + entry = proc_fs_root; 46.367 + } 46.368 + if(!pathptr || !*pathptr) goto exit; 46.369 + while(1){ 46.370 + next = strchr(pathptr, '/'); 46.371 + pathlen = (next ? next - pathptr : strlen(pathptr)); 46.372 + for(entry = entry->subdir; entry ; entry = entry->next) { 46.373 + if(ProcEntry_has_name(entry, pathptr, pathlen)) break; 46.374 + } 46.375 + if (!entry) break; 46.376 + if(!next){ 46.377 + result = entry; 46.378 + break; 46.379 + } 46.380 + pathptr = next + 1; 46.381 + } 46.382 + exit: 46.383 + return result; 46.384 +} 46.385 + 46.386 +static ProcEntry *ProcFS_register(const char *name, ProcEntry *dir, int val){ 46.387 + mode_t mode = 0; 46.388 + ProcEntry *entry; 46.389 + 46.390 + entry = create_proc_entry(name, mode, dir); 46.391 + if(entry){ 46.392 + entry->proc_fops = &proc_file_ops; 46.393 + entry->data = (void*)val; // Whatever data we need. 46.394 + } 46.395 + return entry; 46.396 +} 46.397 + 46.398 +static ProcEntry *ProcFS_mkdir(const char *name, ProcEntry *parent){ 46.399 + ProcEntry *entry = NULL; 46.400 + entry = ProcFS_lookup(name, parent); 46.401 + if(!entry){ 46.402 + const char *path; 46.403 + if(proc_path_init(name, &path)) goto exit; 46.404 + entry = proc_mkdir(path, parent); 46.405 + } 46.406 + exit: 46.407 + return entry; 46.408 +} 46.409 + 46.410 +static void ProcFS_remove(const char *name, ProcEntry *parent){ 46.411 + remove_proc_entry(name, parent); 46.412 +} 46.413 + 46.414 +static void ProcFS_rmrec_entry(ProcEntry *entry){ 46.415 + if(entry){ 46.416 + // Don't want to remove /proc itself! 46.417 + if(entry->parent == entry) return; 46.418 + while(entry->subdir){ 46.419 + ProcFS_rmrec_entry(entry->subdir); 46.420 + } 46.421 + dprintf("> remove %s\n", entry->name); 46.422 + ProcFS_remove(entry->name, entry->parent); 46.423 + } 46.424 +} 46.425 + 46.426 +static void ProcFS_rmrec(const char *name, ProcEntry *parent){ 46.427 + ProcEntry *entry; 46.428 + 46.429 + dprintf("> name=%s\n", name); 46.430 + entry = ProcFS_lookup(name, parent); 46.431 + if(entry){ 46.432 + ProcFS_rmrec_entry(entry); 46.433 + } 46.434 + dprintf("<\n"); 46.435 +} 46.436 + 46.437 +static int stringof(Sxpr exp, char **s){ 46.438 + int err = 0; 46.439 + if(ATOMP(exp)){ 46.440 + *s = atom_name(exp); 46.441 + } else if(STRINGP(exp)){ 46.442 + *s = string_string(exp); 46.443 + } else { 46.444 + err = -EINVAL; 46.445 + *s = NULL; 46.446 + } 46.447 + return err; 46.448 +} 46.449 + 46.450 +static int child_string(Sxpr exp, Sxpr key, char **s){ 46.451 + int err = 0; 46.452 + Sxpr val = sxpr_child_value(exp, key, ONONE); 46.453 + err = stringof(val, s); 46.454 + return err; 46.455 +} 46.456 + 46.457 +static int intof(Sxpr exp, int *v){ 46.458 + int err = 0; 46.459 + char *s; 46.460 + unsigned long l; 46.461 + if(INTP(exp)){ 46.462 + *v = OBJ_INT(exp); 46.463 + } else { 46.464 + err = stringof(exp, &s); 46.465 + if(err) goto exit; 46.466 + err = convert_atoul(s, &l); 46.467 + *v = (int)l; 46.468 + } 46.469 + exit: 46.470 + return err; 46.471 +} 46.472 + 46.473 +static int child_int(Sxpr exp, Sxpr key, int *v){ 46.474 + int err = 0; 46.475 + Sxpr val = sxpr_child_value(exp, key, ONONE); 46.476 + err = intof(val, v); 46.477 + return err; 46.478 +} 46.479 + 46.480 +static int macof(Sxpr exp, unsigned char *v){ 46.481 + int err = 0; 46.482 + char *s; 46.483 + err = stringof(exp, &s); 46.484 + if(err) goto exit; 46.485 + err = mac_aton(s, v); 46.486 + exit: 46.487 + return err; 46.488 +} 46.489 + 46.490 +static int child_mac(Sxpr exp, Sxpr key, unsigned char *v){ 46.491 + int err = 0; 46.492 + Sxpr val = sxpr_child_value(exp, key, ONONE); 46.493 + err = macof(val, v); 46.494 + return err; 46.495 +} 46.496 + 46.497 +static int addrof(Sxpr exp, uint32_t *v){ 46.498 + int err = 0; 46.499 + char *s; 46.500 + unsigned long w; 46.501 + err = stringof(exp, &s); 46.502 + if(err) goto exit; 46.503 + err = get_inet_addr(s, &w); 46.504 + if(err) goto exit; 46.505 + *v = (uint32_t)w; 46.506 + exit: 46.507 + return err; 46.508 +} 46.509 + 46.510 +static int child_addr(Sxpr exp, Sxpr key, uint32_t *v){ 46.511 + int err = 0; 46.512 + Sxpr val = sxpr_child_value(exp, key, ONONE); 46.513 + err = addrof(val, v); 46.514 + return err; 46.515 +} 46.516 + 46.517 +/** Create a vnet. 46.518 + * It is an error if a vnet with the same id exists. 46.519 + * 46.520 + * @param vnet vnet id 46.521 + * @param security security level 46.522 + * @return 0 on success, error code otherwise 46.523 + */ 46.524 +static int ctrl_vnet_add(int vnet, int security){ 46.525 + int err = 0; 46.526 + Vnet *vnetinfo = NULL; 46.527 + if(Vnet_lookup(vnet, &vnetinfo) == 0){ 46.528 + err = -EEXIST; 46.529 + goto exit; 46.530 + } 46.531 + err = Vnet_alloc(&vnetinfo); 46.532 + if(err) goto exit; 46.533 + vnetinfo->vnet = vnet; 46.534 + vnetinfo->security = security; 46.535 + err = Vnet_create(vnetinfo); 46.536 + exit: 46.537 + if(vnetinfo) Vnet_decref(vnetinfo); 46.538 + return err; 46.539 +} 46.540 + 46.541 +/** Delete a vnet. 46.542 + * 46.543 + * @param vnet vnet id 46.544 + * @return 0 on success, error code otherwise 46.545 + */ 46.546 +static int ctrl_vnet_del(int vnet){ 46.547 + int err = -ENOSYS; 46.548 + // Can't delete if there are any vifs on the vnet. 46.549 + //Vnet_del(vnet); 46.550 + return err; 46.551 +} 46.552 + 46.553 +/** Create an entry for a vif with the given vnet and vmac. 46.554 + * 46.555 + * @param vnet vnet id 46.556 + * @param vmac mac address 46.557 + * @return 0 on success, error code otherwise 46.558 + */ 46.559 +static int ctrl_vif_add(int vnet, Vmac *vmac){ 46.560 + int err = 0; 46.561 + Vnet *vnetinfo = NULL; 46.562 + Vif *vif = NULL; 46.563 + 46.564 + dprintf(">\n"); 46.565 + err = Vnet_lookup(vnet, &vnetinfo); 46.566 + if(err) goto exit; 46.567 + err = vif_add(vnet, vmac, &vif); 46.568 + exit: 46.569 + if(vnetinfo) Vnet_decref(vnetinfo); 46.570 + if(vif) vif_decref(vif); 46.571 + dprintf("< err=%d\n", err); 46.572 + return err; 46.573 +} 46.574 + 46.575 +/** Add net device 'vifname' to the bridge for 'vnet' and 46.576 + * create an entry for a vif with the given vnet and vmac. 46.577 + * This is used when device 'vifname' is a virtual device 46.578 + * connected to a vif in a vm. 46.579 + * 46.580 + * @param vifname name of device to bridge 46.581 + * @param vnet vnet id 46.582 + * @param vmac mac address 46.583 + * @return 0 on success, error code otherwise 46.584 + */ 46.585 +static int ctrl_vif_conn(char *vifname, int vnet, Vmac *vmac){ 46.586 + int err = 0; 46.587 + Vnet *vnetinfo = NULL; 46.588 + struct net_device *vifdev = NULL; 46.589 + Vif *vif = NULL; 46.590 + 46.591 + dprintf("> %s\n", vifname); 46.592 + err = Vnet_lookup(vnet, &vnetinfo); 46.593 + if(err) goto exit; 46.594 + err = vif_add(vnet, vmac, &vif); 46.595 + if(err) goto exit; 46.596 + err = vnet_get_device(vifname, &vifdev); 46.597 + if(err) goto exit; 46.598 + vif->dev = vifdev; 46.599 + err = vnet_add_if(vnetinfo, vifdev); 46.600 + exit: 46.601 + if(vnetinfo) Vnet_decref(vnetinfo); 46.602 + if(vif) vif_decref(vif); 46.603 + if(vifdev) dev_put(vifdev); 46.604 + dprintf("< err=%d\n", err); 46.605 + return err; 46.606 +} 46.607 + 46.608 +/** Delete a vif. 46.609 + * 46.610 + * @param vnet vnet id 46.611 + * @param vmac mac address 46.612 + * @return 0 on success, error code otherwise 46.613 + */ 46.614 +static int ctrl_vif_del(int vnet, Vmac *vmac){ 46.615 + int err = 0; 46.616 + Vnet *vnetinfo = NULL; 46.617 + Vif *vif = NULL; 46.618 + 46.619 + dprintf(">\n"); 46.620 + err = Vnet_lookup(vnet, &vnetinfo); 46.621 + if(err) goto exit; 46.622 + err = vif_lookup(vnet, vmac, &vif); 46.623 + if(err) goto exit; 46.624 + if(vif->dev){ 46.625 + vnet_del_if(vnetinfo, vif->dev); 46.626 + vif->dev = NULL; 46.627 + } 46.628 + vif_remove(vnet, vmac); 46.629 + exit: 46.630 + if(vnetinfo) Vnet_decref(vnetinfo); 46.631 + if(vif) vif_decref(vif); 46.632 + dprintf("< err=%d\n", err); 46.633 + return err; 46.634 +} 46.635 + 46.636 +/** (varp.print) 46.637 + */ 46.638 +static int eval_varp_print(Sxpr exp){ 46.639 + int err = 0; 46.640 + varp_print(); 46.641 + return err; 46.642 +} 46.643 + 46.644 +/** (varp.mcaddr (addr <addr>)) 46.645 + */ 46.646 +static int eval_varp_mcaddr(Sxpr exp){ 46.647 + int err =0; 46.648 + Sxpr oaddr = intern("addr"); 46.649 + uint32_t addr; 46.650 + 46.651 + err = child_addr(exp, oaddr, &addr); 46.652 + if(err < 0) goto exit; 46.653 + varp_set_mcast_addr(addr); 46.654 + exit: 46.655 + return err; 46.656 +} 46.657 + 46.658 +/** (vnet.add (id <id>) [(security { none | auth | conf } )] ) 46.659 + */ 46.660 +static int eval_vnet_add(Sxpr exp){ 46.661 + int err = 0; 46.662 + Sxpr oid = intern("id"); 46.663 + Sxpr osecurity = intern("security"); 46.664 + Sxpr csecurity; 46.665 + int id; 46.666 + char *security; 46.667 + int sec; 46.668 + err = child_int(exp, oid, &id); 46.669 + if(err) goto exit; 46.670 + if(id < VNET_VIF){ 46.671 + err = -EINVAL; 46.672 + goto exit; 46.673 + } 46.674 + csecurity = sxpr_child_value(exp, osecurity, intern("none")); 46.675 + err = stringof(csecurity, &security); 46.676 + if(err) goto exit; 46.677 + if(strcmp(security, "none")==0){ 46.678 + sec = 0; 46.679 + } else if(strcmp(security, "auth")==0){ 46.680 + sec = SA_AUTH; 46.681 + } else if(strcmp(security, "conf")==0){ 46.682 + sec = SA_CONF; 46.683 + } else { 46.684 + err = -EINVAL; 46.685 + goto exit; 46.686 + } 46.687 + dprintf("> vnet id=%d\n", id); 46.688 + err = ctrl_vnet_add(id, sec); 46.689 + exit: 46.690 + dprintf("< err=%d\n", err); 46.691 + return err; 46.692 +} 46.693 + 46.694 +/** Delete a vnet. 46.695 + * 46.696 + * (vnet.del (id <id>)) 46.697 + * 46.698 + * @param vnet vnet id 46.699 + * @return 0 on success, error code otherwise 46.700 + */ 46.701 +static int eval_vnet_del(Sxpr exp){ 46.702 + int err = 0; 46.703 + Sxpr oid = intern("id"); 46.704 + int id; 46.705 + 46.706 + err = child_int(exp, oid, &id); 46.707 + if(err) goto exit; 46.708 + err = ctrl_vnet_del(id); 46.709 + exit: 46.710 + return err; 46.711 +} 46.712 + 46.713 +/** (vif.add (vnet <vnet>) (vmac <macaddr>)) 46.714 + */ 46.715 +static int eval_vif_add(Sxpr exp){ 46.716 + int err = 0; 46.717 + Sxpr ovnet = intern("vnet"); 46.718 + Sxpr ovmac = intern("vmac"); 46.719 + int vnet; 46.720 + Vmac vmac = {}; 46.721 + 46.722 + err = child_int(exp, ovnet, &vnet); 46.723 + if(err) goto exit; 46.724 + err = child_mac(exp, ovmac, vmac.mac); 46.725 + if(err) goto exit; 46.726 + err = ctrl_vif_add(vnet, &vmac); 46.727 + exit: 46.728 + return err; 46.729 +} 46.730 + 46.731 +/** (vif.conn (vif <name>) (vnet <id>) (vmac <mac>)) 46.732 + */ 46.733 +static int eval_vif_conn(Sxpr exp){ 46.734 + int err = 0; 46.735 + Sxpr ovif = intern("vif"); 46.736 + Sxpr ovnet = intern("vnet"); 46.737 + Sxpr ovmac = intern("vmac"); 46.738 + char *vif = NULL; 46.739 + int vnet = 0; 46.740 + Vmac vmac = {}; 46.741 + 46.742 + err = child_string(exp, ovif, &vif); 46.743 + if(err) goto exit; 46.744 + err = child_int(exp, ovnet, &vnet); 46.745 + if(err) goto exit; 46.746 + err = child_mac(exp, ovmac, vmac.mac); 46.747 + dprintf("> connect vif=%s vnet=%d\n", vif, vnet); 46.748 + err = ctrl_vif_conn(vif, vnet, &vmac); 46.749 + exit: 46.750 + dprintf("< err=%d\n", err); 46.751 + return err; 46.752 +} 46.753 + 46.754 +/** (vif.del (vnet <vnet>) (vmac <macaddr>)) 46.755 + */ 46.756 +static int eval_vif_del(Sxpr exp){ 46.757 + int err = 0; 46.758 + Sxpr ovnet = intern("vnet"); 46.759 + Sxpr ovmac = intern("vmac"); 46.760 + int vnet; 46.761 + Vmac vmac = {}; 46.762 + 46.763 + err = child_int(exp, ovnet, &vnet); 46.764 + if(err) goto exit; 46.765 + err = child_mac(exp, ovmac, vmac.mac); 46.766 + if(err) goto exit; 46.767 + err = ctrl_vif_del(vnet, &vmac); 46.768 + exit: 46.769 + return err; 46.770 +} 46.771 + 46.772 +typedef struct SxprEval { 46.773 + Sxpr elt; 46.774 + int (*fn)(Sxpr); 46.775 +} SxprEval; 46.776 + 46.777 +static int eval(Sxpr exp){ 46.778 + int err = 0; 46.779 + SxprEval defs[] = { 46.780 + { intern("varp.print"), eval_varp_print }, 46.781 + { intern("varp.mcaddr"), eval_varp_mcaddr }, 46.782 + { intern("vif.add"), eval_vif_add }, 46.783 + { intern("vif.conn"), eval_vif_conn }, 46.784 + { intern("vif.del"), eval_vif_del }, 46.785 + { intern("vnet.add"), eval_vnet_add }, 46.786 + { intern("vnet.del"), eval_vnet_del }, 46.787 + { ONONE, NULL } }; 46.788 + SxprEval *def; 46.789 + 46.790 + dprintf(">\n"); 46.791