ia64/xen-unstable

view tools/vnet/vnet-module/skb_util.c @ 6946:e703abaf6e3d

Add behaviour to the remove methods to remove the transaction's path itself. This allows us to write Remove(path) to remove the specified path rather than having to slice the path ourselves.
author emellor@ewan
date Sun Sep 18 14:42:13 2005 +0100 (2005-09-18)
parents 0a4b76b6b5a0
children 71b0f00f6344
line source
1 /*
2 * Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by the
6 * Free Software Foundation; either version 2 of the License, or (at your
7 * option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
11 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * for more details.
13 *
14 * You should have received a copy of the GNU General Public License along
15 * with this program; if not, write to the Free software Foundation, Inc.,
16 * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA
17 *
18 */
19 #include <linux/config.h>
20 #include <linux/module.h>
21 #include <linux/kernel.h>
22 #include <linux/init.h>
23 #include <linux/version.h>
25 #include <asm/scatterlist.h>
26 #include <linux/crypto.h>
27 #include <linux/pfkeyv2.h>
28 #include <linux/random.h>
30 #include <linux/net.h>
31 #include <linux/in.h>
32 #include <linux/inet.h>
33 #include <linux/netdevice.h>
34 #include <linux/tcp.h>
35 #include <linux/udp.h>
37 #include <net/ip.h>
38 #include <net/protocol.h>
39 #include <net/route.h>
40 #include <linux/skbuff.h>
42 #include <varp.h>
43 #include <skb_util.h>
45 #define MODULE_NAME "VNET"
46 #define DEBUG 1
47 #undef DEBUG
48 #include "debug.h"
50 static const int DEBUG_SCATTERLIST = 0;
51 static const int DEBUG_SKB = 0;
53 //============================================================================
54 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
55 #define SET_SCATTER_ADDR(sg, addr) do{} while(0)
56 #else
57 #define SET_SCATTER_ADDR(sg, addr) (sg).address = (addr)
58 #endif
60 /** Make enough room in an skb for extra header and trailer.
61 *
62 * @param pskb return parameter for expanded skb
63 * @param skb skb
64 * @param head_n required headroom
65 * @param tail_n required tailroom
66 * @return 0 on success, error code otherwise
67 */
68 int skb_make_room(struct sk_buff **pskb, struct sk_buff *skb, int head_n, int tail_n){
69 int err = 0;
70 int has_headroom = (head_n <= skb_headroom(skb));
71 int has_tailroom = (tail_n <= skb_tailroom(skb));
72 int writeable = !skb_cloned(skb) && !skb_shared(skb);
74 dprintf("> skb=%p headroom=%d head_n=%d tailroom=%d tail_n=%d\n",
75 skb,
76 skb_headroom(skb), head_n,
77 skb_tailroom(skb), tail_n);
78 if(writeable && has_headroom && has_tailroom){
79 // There's room! Reuse it.
80 *pskb = skb;
81 } else if(writeable && has_tailroom){
82 // Tailroom, no headroom. Expand header the way GRE does.
83 struct sk_buff *new_skb = skb_realloc_headroom(skb, head_n + 16);
84 if(!new_skb){
85 err = -ENOMEM;
86 goto exit;
87 }
88 dev_kfree_skb(skb);
89 *pskb = new_skb;
90 } else {
91 // No room. Expand. There may be more efficient ways to do
92 // this, but this is simple and correct.
93 struct sk_buff *new_skb = skb_copy_expand(skb, head_n + 16, tail_n, GFP_ATOMIC);
94 if(!new_skb){
95 err = -ENOMEM;
96 goto exit;
97 }
98 dev_kfree_skb(skb);
99 *pskb = new_skb;
100 }
101 dprintf("> skb=%p headroom=%d head_n=%d tailroom=%d tail_n=%d\n",
102 *pskb,
103 skb_headroom(*pskb), head_n,
104 skb_tailroom(*pskb), tail_n);
105 exit:
106 dprintf("< err=%d\n", err);
107 return err;
108 }
110 /** Copy some data bits from a kernel buffer to an skb.
111 * Derived in the obvious way from skb_copy_bits().
112 */
113 int skb_put_bits(const struct sk_buff *skb, int offset, void *src, int len)
114 {
115 int i, copy;
116 int start = skb->len - skb->data_len;
118 if (offset > (int)skb->len-len)
119 goto fault;
121 /* Copy header. */
122 if ((copy = start-offset) > 0) {
123 if (copy > len)
124 copy = len;
125 memcpy(skb->data + offset, src, copy);
126 if ((len -= copy) == 0)
127 return 0;
128 offset += copy;
129 src += copy;
130 }
132 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
133 int end;
135 BUG_TRAP(start <= offset+len);
137 end = start + skb_shinfo(skb)->frags[i].size;
138 if ((copy = end-offset) > 0) {
139 u8 *vaddr;
141 if (copy > len)
142 copy = len;
144 vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]);
145 memcpy(vaddr + skb_shinfo(skb)->frags[i].page_offset + offset - start,
146 src,
147 copy);
148 kunmap_skb_frag(vaddr);
150 if ((len -= copy) == 0)
151 return 0;
152 offset += copy;
153 src += copy;
154 }
155 start = end;
156 }
158 if (skb_shinfo(skb)->frag_list) {
159 struct sk_buff *list;
161 for (list = skb_shinfo(skb)->frag_list; list; list=list->next) {
162 int end;
164 BUG_TRAP(start <= offset+len);
166 end = start + list->len;
167 if ((copy = end-offset) > 0) {
168 if (copy > len)
169 copy = len;
170 if (skb_put_bits(list, offset-start, src, copy))
171 goto fault;
172 if ((len -= copy) == 0)
173 return 0;
174 offset += copy;
175 src += copy;
176 }
177 start = end;
178 }
179 }
180 if (len == 0)
181 return 0;
183 fault:
184 return -EFAULT;
185 }
187 /** Add some space to the end of a (possibly fragmented) skb.
188 *
189 * Only works with Xen output skbs. Output skbs have 1 frag, and we
190 * add another frag for the extra space.
191 *
192 * @param skb skb
193 * @param n number of bytes to add
194 * @return 0 on success, error code otherwise
195 *
196 * @todo fixme
197 */
198 int pskb_put(struct sk_buff *skb, int n){
199 int err = 0;
200 if(1 || skb_is_nonlinear(skb)){
201 struct skb_shared_info *info = skb_shinfo(skb);
202 char *ptr = NULL;
204 if(info->nr_frags >= MAX_SKB_FRAGS){
205 err = -ENOMEM;
206 goto exit;
207 }
208 ptr = kmalloc(n, GFP_ATOMIC);
209 if(!ptr){
210 err = -ENOMEM;
211 goto exit;
212 }
213 info->nr_frags++;
214 info->frags[info->nr_frags - 1].page = virt_to_page(ptr);
215 info->frags[info->nr_frags - 1].page_offset = ((unsigned long)ptr & ~PAGE_MASK);
216 info->frags[info->nr_frags - 1].size = n;
218 skb->data_len += n;
219 skb->len += n;
220 } else {
221 __skb_put(skb, n);
222 }
223 exit:
224 if(err) dprintf("< err=%d\n", err);
225 return err;
226 }
228 /** Print some bits of an skb.
229 *
230 * @param skb to print
231 * @param offset byte offset to start printing at
232 * @param n number of bytes to print
233 */
234 void skb_print_bits(struct sk_buff *skb, int offset, int n){
235 int chunk = 16;
236 int i, k;
237 u8 buff[chunk];
238 if(!DEBUG_SKB) return;
239 while(n){
240 k = (n > chunk ? chunk : n);
241 skb_copy_bits(skb, offset, buff, k);
242 printk("%03d ", offset);
243 for(i=0; i<k; i++){
244 if(i == 8)printk(" ");
245 printk(":%02x", buff[i] & 0xff);
246 }
247 printk(" \n");
248 n -= k;
249 offset += k;
250 }
251 }
253 /** Print a buffer.
254 *
255 * @param buf to print
256 * @param n number of bytes to print
257 */
258 void buf_print(char *buf, int n){
259 int i;
260 for(i=0; i<n; i++){
261 if( i % 16 == 0) printk("\n%04d ", i);
262 else if(i % 8 == 0) printk(" ");
263 printk(":%02x", buf[i] & 0xff);
264 }
265 printk(" %04d\n", n);
266 }
268 /** Remove some space from the tail of an skb.
269 *
270 * @todo fixme: Do we need to handle frags?
271 */
272 void *skb_trim_tail(struct sk_buff *skb, int n){
273 skb->tail -= n;
274 skb->len -= n;
275 return skb->tail;
276 }
278 // #define BUG_TRAP(x)
279 // if(!(x)){ printk("KERNEL: assertion (" #x ") failed at " __FILE__ "(%d)\n", __LINE__); }
281 /** Convert a (possibly fragmented) skb into a scatter list.
282 *
283 * @param skb skb to convert
284 * @param sg scatterlist to set up
285 * @param sg_n size of sg on input, number of elements set on output
286 * @param offset offset into data to start at
287 * @param len number of bytes
288 * @return 0 on success, error code otherwise
289 */
290 int skb_scatterlist(struct sk_buff *skb, struct scatterlist *sg, int *sg_n,
291 int offset, int len){
292 int err = 0;
293 int start; // No. of bytes copied so far (where next copy starts).
294 int size; // Size of the next chunk.
295 int end; // Where the next chunk ends (start + size).
296 int copy; // Number of bytes to copy in one operation.
297 int sg_i = 0; // Index into sg.
298 int i;
300 if(DEBUG_SCATTERLIST){
301 dprintf("> offset=%d len=%d (end=%d), skb len=%d,\n",
302 offset, len, offset+len, skb->len);
303 }
304 start = 0;
305 size = skb_headlen(skb);
306 end = start + size;
307 copy = end - offset;
308 if(copy > 0){
309 char *p;
310 if(copy > len) copy = len;
311 if(sg_i >= *sg_n){
312 err = -EINVAL;
313 goto exit;
314 }
315 p = skb->data + offset;
316 SET_SCATTER_ADDR(sg[sg_i], NULL);
317 sg[sg_i].page = virt_to_page(p);
318 sg[sg_i].offset = ((unsigned long)p & ~PAGE_MASK);
319 sg[sg_i].length = copy;
320 if(DEBUG_SCATTERLIST){
321 dprintf("> sg_i=%d .page=%p .offset=%u .length=%d\n",
322 sg_i, sg[sg_i].page, sg[sg_i].offset, sg[sg_i].length);
323 }
324 sg_i++;
325 if((len -= copy) == 0) goto exit;
326 offset += copy;
327 }
328 start = end;
329 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++){
330 BUG_TRAP(start <= offset + len);
331 size = skb_shinfo(skb)->frags[i].size;
332 end = start + size;
333 copy = end - offset;
334 if(copy > 0){
335 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
336 if(copy > len) copy = len;
337 if(sg_i >= *sg_n){
338 err = -EINVAL;
339 goto exit;
340 }
341 SET_SCATTER_ADDR(sg[sg_i], NULL);
342 sg[sg_i].page = frag->page;
343 sg[sg_i].offset = frag->page_offset + offset - start;
344 sg[sg_i].length = copy;
345 if(DEBUG_SCATTERLIST){
346 dprintf("> sg_i=%d .page=%p .offset=%u .length=%d\n",
347 sg_i, sg[sg_i].page, sg[sg_i].offset, sg[sg_i].length);
348 }
349 sg_i++;
350 if((len -= copy) == 0) goto exit;
351 offset += copy;
352 }
353 start = end;
354 }
355 exit:
356 if(!err) *sg_n = sg_i;
357 if(len) wprintf("> len=%d\n", len);
358 if(len) BUG();
359 if(err) dprintf("< err=%d sg_n=%d\n", err, *sg_n);
360 return err;
361 }
363 struct arpheader
364 {
365 unsigned short ar_hrd; /* format of hardware address */
366 unsigned short ar_pro; /* format of protocol address */
367 unsigned char ar_hln; /* length of hardware address */
368 unsigned char ar_pln; /* length of protocol address */
369 unsigned short ar_op; /* ARP opcode (command) */
371 #if 1
372 /*
373 * Ethernet looks like this : This bit is variable sized however...
374 */
375 unsigned char ar_sha[ETH_ALEN]; /* sender hardware address */
376 unsigned char ar_sip[4]; /* sender IP address */
377 unsigned char ar_tha[ETH_ALEN]; /* target hardware address */
378 unsigned char ar_tip[4]; /* target IP address */
379 #endif
381 };
383 void print_skb_data(char *msg, int count, struct sk_buff *skb, u8 *data, int len)
384 {
385 static int skb_count = 1000000;
386 u8 *ptr, *end;
387 u32 src_addr, dst_addr;
388 // Transport layer header.
389 union {
390 struct tcphdr *th;
391 struct udphdr *uh;
392 struct icmphdr *icmph;
393 struct igmphdr *igmph;
394 struct iphdr *ipiph;
395 unsigned char *raw;
396 } h;
397 // Network layer header.
398 union {
399 struct iphdr *iph;
400 struct ipv6hdr *ipv6h;
401 struct arpheader *arph;
402 struct ipxhdr *ipxh;
403 unsigned char *raw;
404 } nh;
405 // Link layer header.
406 union {
407 struct ethhdr *ethernet;
408 unsigned char *raw;
409 } mac;
410 int protocol;
411 if(!count) count = ++skb_count;
412 if(!msg) msg = (char *)__FUNCTION__;
413 if(!data){
414 printk("%s.%d> null data\n", msg, count);
415 return;
416 }
417 ptr = data;
418 end = data + len;
419 mac.raw = ptr;
420 ptr += sizeof(struct ethhdr);
421 if(ptr > end){ printk("***MAC:"); goto exit; }
422 protocol = ntohs(mac.ethernet->h_proto);
423 nh.raw = ptr;
425 printk("%s.%d> type=%d protocol=0x%x\n",
426 msg, count, skb->pkt_type, htons(skb->protocol));
427 if(1){
428 printk("%s.%d> %p mac src=" MACFMT " dst=" MACFMT "\n",
429 msg, count, data,
430 MAC6TUPLE(mac.ethernet->h_source),
431 MAC6TUPLE(mac.ethernet->h_dest));
432 }
434 switch(protocol){
435 case ETH_P_ARP:
436 ptr += sizeof(struct arpheader);
437 if(ptr > end){ printk("***ARP:"); goto exit; }
438 if(0){
439 printk("%s.%d> ARP hrd=%d, pro=%d, hln=%d, pln=%d, op=%d\n",
440 msg, count,
441 nh.arph->ar_hrd, nh.arph->ar_pro, nh.arph->ar_hln,
442 nh.arph->ar_pln, nh.arph->ar_op);
443 }
444 memcpy(&src_addr, nh.arph->ar_sip, 4);
445 src_addr = ntohl(src_addr);
446 memcpy(&dst_addr, nh.arph->ar_tip, 4);
447 dst_addr = ntohl(dst_addr);
448 printk("%s.%d> ARP HW src=" MACFMT " dst=" MACFMT "\n",
449 msg, count, MAC6TUPLE(nh.arph->ar_sha), MAC6TUPLE(nh.arph->ar_tha));
450 printk("%s.%d> ARP IP src=" IPFMT " dst=" IPFMT "\n",
451 msg, count, HIPQUAD(src_addr), HIPQUAD(dst_addr));
452 break;
453 case ETH_P_IP: {
454 u16 src_port, dst_port;
455 if(ptr + sizeof(struct iphdr) > end){ printk("***IP:"); goto exit; }
456 src_addr = ntohl(nh.iph->saddr);
457 dst_addr = ntohl(nh.iph->daddr);
458 if(1){
459 printk("%s.%d> IP proto=%d src=" IPFMT " dst=" IPFMT "\n",
460 msg, count, nh.iph->protocol,
461 HIPQUAD(src_addr), HIPQUAD(dst_addr));
462 printk("%s.%d> IP tot_len=%u len=%d\n",
463 msg, count, nh.iph->tot_len & 0xffff, len - ETH_HLEN);
464 }
465 ptr += (nh.iph->ihl * 4);
466 if(ptr > end){ printk ("***IP: len"); goto exit; }
467 h.raw = ptr;
468 switch(nh.iph->protocol){
469 case IPPROTO_TCP:
470 ptr += sizeof(struct tcphdr);
471 if(ptr > end){ printk("***TCP:"); goto exit; }
472 src_port = ntohs(h.th->source);
473 dst_port = ntohs(h.th->dest);
474 printk("%s.%d> TCP src=" IPFMT ":%u dst=" IPFMT ":%u\n",
475 msg, count,
476 HIPQUAD(src_addr), src_port,
477 HIPQUAD(dst_addr), dst_port);
478 break;
479 case IPPROTO_UDP:
480 ptr += sizeof(struct udphdr);
481 if(ptr > end){ printk("***UDP:"); goto exit; }
482 src_port = ntohs(h.uh->source);
483 dst_port = ntohs(h.uh->dest);
484 printk("%s.%d> UDP src=" IPFMT ":%u dst=" IPFMT ":%u\n",
485 msg, count,
486 HIPQUAD(src_addr), src_port,
487 HIPQUAD(dst_addr), dst_port);
488 break;
489 default:
490 printk("%s.%d> IP %d src=" IPFMT " dst=" IPFMT "\n",
491 msg, count,
492 nh.iph->protocol, HIPQUAD(src_addr), HIPQUAD(dst_addr));
493 break;
494 }
495 break; }
496 case ETH_P_IPV6:
497 printk("%s.%d> IPv6\n", msg, count);
498 break;
499 case ETH_P_IPX:
500 printk("%s.%d> IPX\n", msg, count);
501 break;
502 default:
503 printk("%s.%d> protocol=%d\n", msg, count, protocol);
504 break;
505 }
506 return;
507 exit:
508 printk("%s.%d> %s: skb problem\n", msg, count, __FUNCTION__);
509 printk("%s.%d> %s: data=%p end=%p(%d) ptr=%p(%d) eth=%d arp=%d ip=%d\n",
510 msg, count, __FUNCTION__,
511 data, end, end - data, ptr, ptr - data,
512 sizeof(struct ethhdr), sizeof(struct arphdr), sizeof(struct iphdr));
513 return;
514 }