ia64/xen-unstable

view xen-2.4.16/common/network.c @ 97:ce656d157bbf

bitkeeper revision 1.15.1.9 (3e2fd418NdZYqL3KPr6URqd77DwnJQ)

Add shadow ring in the RX direction to protect buffers from guest
tampering once they have been passed to the hypervisor. This
is under the umbrella of eventual zero-copy network code.
author akw27@plucky.localdomain
date Thu Jan 23 11:38:00 2003 +0000 (2003-01-23)
parents 8ce741992076
children 82679de8a1ca
line source
1 /* network.c
2 *
3 * Network virtualization for Xen. Lower-level network interactions are in
4 * net/dev.c and in the drivers. This file contains routines to interact
5 * with the virtual interfaces (vifs) and the virtual firewall/router through
6 * the use of rules.
7 *
8 * Copyright (c) 2002, A K Warfield and K A Fraser
9 */
11 #include <hypervisor-ifs/network.h>
12 #include <xeno/sched.h>
13 #include <xeno/errno.h>
14 #include <xeno/init.h>
15 #include <xeno/slab.h>
16 #include <xeno/spinlock.h>
17 #include <xeno/if_ether.h>
18 #include <linux/skbuff.h>
19 #include <xeno/netdevice.h>
20 #include <xeno/in.h>
22 /* vif globals
23 * sys_vif_list is a lookup table for vifs, used in packet forwarding.
24 * it will be replaced later by something a little more flexible.
25 */
27 int sys_vif_count; /* global vif count */
28 net_vif_t *sys_vif_list[MAX_SYSTEM_VIFS]; /* global vif array */
29 net_rule_ent_t *net_rule_list; /* global list of rules */
30 kmem_cache_t *net_vif_cache;
31 kmem_cache_t *net_rule_cache;
32 static rwlock_t net_rule_lock = RW_LOCK_UNLOCKED; /* rule mutex */
33 static rwlock_t sys_vif_lock = RW_LOCK_UNLOCKED; /* vif mutex */
35 void print_net_rule_list();
38 /* ----[ VIF Functions ]----------------------------------------------------*/
40 /* create_net_vif - Create a new vif and append it to the specified domain.
41 *
42 * the domain is examined to determine how many vifs currently are allocated
43 * and the newly allocated vif is appended. The vif is also added to the
44 * global list.
45 *
46 */
48 net_vif_t *create_net_vif(int domain)
49 {
50 net_vif_t *new_vif;
51 net_ring_t *new_ring;
52 net_shadow_ring_t *shadow_ring;
53 struct task_struct *dom_task;
55 if ( !(dom_task = find_domain_by_id(domain)) )
56 {
57 return NULL;
58 }
60 if ( (new_vif = kmem_cache_alloc(net_vif_cache, GFP_KERNEL)) == NULL )
61 {
62 return NULL;
63 }
65 new_ring = dom_task->net_ring_base + dom_task->num_net_vifs;
66 memset(new_ring, 0, sizeof(net_ring_t));
68 // allocate the shadow ring.
69 // maybe these should be kmem_cache instead of kmalloc?
71 shadow_ring = kmalloc(sizeof(net_shadow_ring_t), GFP_KERNEL);
72 if (shadow_ring == NULL) goto fail;
74 shadow_ring->tx_ring = kmalloc(TX_RING_SIZE
75 * sizeof(tx_shadow_entry_t), GFP_KERNEL);
76 shadow_ring->rx_ring = kmalloc(RX_RING_SIZE
77 * sizeof(rx_shadow_entry_t), GFP_KERNEL);
78 if ((shadow_ring->tx_ring == NULL) || (shadow_ring->rx_ring == NULL))
79 goto fail;
81 shadow_ring->rx_prod = 0;
83 // fill in the new vif struct.
85 new_vif->net_ring = new_ring;
86 new_vif->shadow_ring = shadow_ring;
89 skb_queue_head_init(&new_vif->skb_list);
90 new_vif->domain = domain;
92 write_lock(&sys_vif_lock);
93 new_vif->id = sys_vif_count;
94 sys_vif_list[sys_vif_count++] = new_vif;
95 write_unlock(&sys_vif_lock);
97 dom_task->net_vif_list[dom_task->num_net_vifs] = new_vif;
98 dom_task->num_net_vifs++;
100 return new_vif;
102 fail:
103 printk("VIF allocation failed!\n");
104 return NULL;
105 }
107 /* delete_net_vif - Delete the last vif in the given domain.
108 *
109 * There doesn't seem to be any reason (yet) to be able to axe an arbitrary
110 * vif, by vif id.
111 */
113 void destroy_net_vif(struct task_struct *p)
114 {
115 struct sk_buff *skb;
116 int i;
118 if ( p->num_net_vifs <= 0 ) return; // nothing to do.
120 i = --p->num_net_vifs;
121 while ( (skb = skb_dequeue(&p->net_vif_list[i]->skb_list)) != NULL )
122 {
123 kfree_skb(skb);
124 }
126 write_lock(&sys_vif_lock);
127 sys_vif_list[p->net_vif_list[i]->id] = NULL; // system vif list not gc'ed
128 write_unlock(&sys_vif_lock);
130 kfree(p->net_vif_list[i]->shadow_ring->tx_ring);
131 kfree(p->net_vif_list[i]->shadow_ring->rx_ring);
132 kfree(p->net_vif_list[i]->shadow_ring);
133 kmem_cache_free(net_vif_cache, p->net_vif_list[i]);
134 }
136 /* print_vif_list - Print the contents of the global vif table.
137 */
139 void print_vif_list()
140 {
141 int i;
142 net_vif_t *v;
144 printk("Currently, there are %d VIFs.\n", sys_vif_count);
145 for (i=0; i<sys_vif_count; i++)
146 {
147 v = sys_vif_list[i];
148 printk("] VIF Entry %d(%d):\n", i, v->id);
149 printk(" > net_ring*: %p\n", v->net_ring);
150 printk(" > domain : %u\n", v->domain);
151 }
152 }
154 /* ----[ Net Rule Functions ]-----------------------------------------------*/
156 /* add_net_rule - Add a new network filter rule.
157 */
159 int add_net_rule(net_rule_t *rule)
160 {
161 net_rule_ent_t *new_ent;
163 if ( (new_ent = kmem_cache_alloc(net_rule_cache, GFP_KERNEL)) == NULL )
164 {
165 return -ENOMEM;
166 }
168 memcpy(&new_ent->r, rule, sizeof(net_rule_t));
170 write_lock(&net_rule_lock);
171 new_ent->next = net_rule_list;
172 net_rule_list = new_ent;
173 write_unlock(&net_rule_lock);
175 return 0;
176 }
178 /* delete_net_rule - Delete an existing network rule.
179 */
181 int delete_net_rule(net_rule_t *rule)
182 {
183 net_rule_ent_t *ent = net_rule_list, *prev = NULL;
184 while ( (ent) && ((memcmp(rule, &ent->r, sizeof(net_rule_t))) != 0) )
185 {
186 prev = ent;
187 ent = ent->next;
188 }
190 if (ent != NULL)
191 {
192 write_lock(&net_rule_lock);
193 if (prev != NULL)
194 {
195 prev->next = ent->next;
196 }
197 else
198 {
199 net_rule_list = ent->next;
200 }
201 kmem_cache_free(net_rule_cache, ent);
202 write_unlock(&net_rule_lock);
203 }
204 return 0;
205 }
207 /* add_default_net_rule - Set up default network path (ie for dom0).
208 *
209 * this is a utility function to route all traffic with the specified
210 * ip address to the specified vif. It's used to set up domain zero.
211 */
213 void add_default_net_rule(int vif_id, u32 ipaddr)
214 {
215 net_rule_t new_rule;
217 //outbound rule.
218 memset(&new_rule, 0, sizeof(net_rule_t));
219 new_rule.src_addr = ipaddr;
220 new_rule.src_addr_mask = 0xffffffff;
221 new_rule.src_interface = vif_id;
222 new_rule.dst_interface = VIF_PHYSICAL_INTERFACE;
223 new_rule.action = NETWORK_ACTION_ACCEPT;
224 new_rule.proto = NETWORK_PROTO_ANY;
225 add_net_rule(&new_rule);
227 //inbound rule;
228 memset(&new_rule, 0, sizeof(net_rule_t));
229 new_rule.dst_addr = ipaddr;
230 new_rule.dst_addr_mask = 0xffffffff;
231 new_rule.src_interface = VIF_PHYSICAL_INTERFACE;
232 new_rule.dst_interface = vif_id;
233 new_rule.action = NETWORK_ACTION_ACCEPT;
234 new_rule.proto = NETWORK_PROTO_ANY;
235 add_net_rule(&new_rule);
237 }
239 /* print_net_rule - Print a single net rule.
240 */
242 void print_net_rule(net_rule_t *r)
243 {
244 printk("===] NET RULE:\n");
245 printk("=] src_addr : %lu\n", (unsigned long) r->src_addr);
246 printk("=] src_addr_mask : %lu\n", (unsigned long) r->src_addr_mask);
247 printk("=] dst_addr : %lu\n", (unsigned long) r->dst_addr);
248 printk("=] dst_addr_mask : %lu\n", (unsigned long) r->dst_addr_mask);
249 printk("=] src_port : %u\n", r->src_port);
250 printk("=] src_port_mask : %u\n", r->src_port_mask);
251 printk("=] dst_port : %u\n", r->dst_port);
252 printk("=] dst_port_mask : %u\n", r->dst_port_mask);
253 printk("=] dst_proto : %u\n", r->proto);
254 printk("=] src_interface : %d\n", r->src_interface);
255 printk("=] dst_interface : %d\n", r->dst_interface);
256 printk("=] action : %u\n", r->action);
257 }
259 /* print_net_rule_list - Print the global rule table.
260 */
262 void print_net_rule_list()
263 {
264 net_rule_ent_t *ent;
265 int count = 0;
267 read_lock(&net_rule_lock);
269 ent = net_rule_list;
271 while (ent)
272 {
273 print_net_rule(&ent->r);
274 ent = ent->next;
275 count++;
276 }
277 printk("\nTotal of %d rules.\n", count);
279 read_unlock(&net_rule_lock);
280 }
282 /* net_find_rule - Find the destination vif according to the current rules.
283 *
284 * Apply the rules to this skbuff and return the vif id that it is bound for.
285 * If there is no match, VIF_DROP is returned.
286 */
288 int net_find_rule(u8 nproto, u8 tproto, u32 src_addr, u32 dst_addr, u16 src_port, u16 dst_port,
289 int src_vif)
290 {
291 net_rule_ent_t *ent;
292 int dest = VIF_DROP;
294 read_lock(&net_rule_lock);
296 ent = net_rule_list;
298 while (ent)
299 {
300 if ( ( (ent->r.src_interface == src_vif)
301 || (ent->r.src_interface == VIF_ANY_INTERFACE) )
303 && (!((ent->r.src_addr ^ src_addr) & ent->r.src_addr_mask ))
304 && (!((ent->r.dst_addr ^ dst_addr) & ent->r.dst_addr_mask ))
305 && (!((ent->r.src_port ^ src_port) & ent->r.src_port_mask ))
306 && (!((ent->r.dst_port ^ dst_port) & ent->r.dst_port_mask ))
308 && (
309 (ent->r.proto == NETWORK_PROTO_ANY)
310 || ((ent->r.proto == NETWORK_PROTO_IP) && (nproto == (u8)ETH_P_IP))
311 || ((ent->r.proto == NETWORK_PROTO_ARP) && (nproto == (u8)ETH_P_ARP))
312 || ((ent->r.proto == NETWORK_PROTO_TCP) && (tproto == IPPROTO_TCP))
313 || ((ent->r.proto == NETWORK_PROTO_UDP) && (tproto == IPPROTO_UDP))
314 )
315 )
316 {
317 break;
318 }
319 ent = ent->next;
320 }
322 if (ent) (dest = ent->r.dst_interface);
323 read_unlock(&net_rule_lock);
324 return dest;
325 }
327 /* net_get_target_vif - Find the vif that the given sk_buff is bound for.
328 *
329 * This is intended to be the main interface to the VFR rules, where
330 * net_find_rule (above) is a private aspect of the current matching
331 * implementation. All in-hypervisor routing should use this function only
332 * to ensure that this can be rewritten later.
333 *
334 * Currently, network rules are stored in a global linked list. New rules are
335 * added to the front of this list, and (at present) the first matching rule
336 * determines the vif that a packet is sent to. This is obviously not ideal,
337 * it might be more advisable to have chains, or at lest most-specific
338 * matching, and moreover routing latency increases linearly (for old rules)
339 * as new rules are added.
340 *
341 * net_get_target_vif examines the sk_buff and pulls out the relevant fields
342 * based on the packet type. it then calls net_find_rule to scan the rule
343 * list.
344 */
346 int net_get_target_vif(struct sk_buff *skb)
347 {
348 int target = VIF_DROP;
349 skb->h.raw = skb->nh.raw = skb->data;
350 if ( skb->len < 2 ) goto drop;
351 switch ( ntohs(skb->mac.ethernet->h_proto) )
352 {
353 case ETH_P_ARP:
354 if ( skb->len < 28 ) goto drop;
355 target = net_find_rule((u8)ETH_P_ARP, 0, ntohl(*(u32 *)(skb->nh.raw + 14)),
356 ntohl(*(u32 *)(skb->nh.raw + 24)), 0, 0,
357 skb->src_vif);
358 break;
359 case ETH_P_IP:
360 if ( skb->len < 20 ) goto drop;
361 skb->h.raw += ((*(unsigned char *)(skb->nh.raw)) & 0x0f) * 4;
362 switch ( *(unsigned char *)(skb->nh.raw + 9) )
363 {
364 case IPPROTO_TCP:
365 case IPPROTO_UDP:
366 target = net_find_rule((u8)ETH_P_IP, *(u8 *)(skb->nh.raw + 9),
367 ntohl(*(u32 *)(skb->nh.raw + 12)),
368 ntohl(*(u32 *)(skb->nh.raw + 16)),
369 ntohs(*(u16 *)(skb->h.raw)),
370 ntohs(*(u16 *)(skb->h.raw + 2)),
371 skb->src_vif);
372 break;
373 default: // ip-based protocol where we don't have ports.
374 target = net_find_rule((u8)ETH_P_IP, *(u8 *)(skb->nh.raw + 9),
375 ntohl(*(u32 *)(skb->nh.raw + 12)),
376 ntohl(*(u32 *)(skb->nh.raw + 16)),
377 0,
378 0,
379 skb->src_vif);
380 }
381 break;
382 }
383 skb->dst_vif=target;
384 return target;
386 drop:
387 return VIF_DROP;
388 }
390 /* ----[ Syscall Interface ]------------------------------------------------*/
392 /*
393 * This is the hook function to handle guest-invoked traps requesting
394 * changes to the network system.
395 */
397 long do_network_op(network_op_t *u_network_op)
398 {
399 long ret=0;
400 network_op_t op;
402 if ( current->domain != 0 )
403 return -EPERM;
405 if ( copy_from_user(&op, u_network_op, sizeof(op)) )
406 return -EFAULT;
407 switch ( op.cmd )
408 {
410 case NETWORK_OP_ADDRULE:
411 {
412 add_net_rule(&op.u.net_rule);
413 }
414 break;
416 case NETWORK_OP_DELETERULE:
417 {
418 delete_net_rule(&op.u.net_rule);
419 }
420 break;
422 case NETWORK_OP_GETRULELIST:
423 {
424 // This should eventually ship a rule list up to the VM
425 // to be printed in its procfs. For now, we just print the rules.
427 print_net_rule_list();
428 }
429 break;
431 default:
432 ret = -ENOSYS;
433 }
435 return ret;
436 }
438 void __init net_init (void)
439 {
440 sys_vif_count = 0;
441 memset(sys_vif_list, 0, sizeof(sys_vif_list));
442 net_rule_list = NULL;
443 net_vif_cache = kmem_cache_create("net_vif_cache", sizeof(net_vif_t),
444 0, SLAB_HWCACHE_ALIGN, NULL, NULL);
445 net_rule_cache = kmem_cache_create("net_rule_cache", sizeof(net_rule_ent_t),
446 0, SLAB_HWCACHE_ALIGN, NULL, NULL);
447 }