ia64/xen-unstable

view tools/vnet/vnet-module/vnet_ioctl.c @ 8740:3d7ea7972b39

Update patches for linux 2.6.15.

Signed-off-by: Christian Limpach <Christian.Limpach@cl.cam.ac.uk>
author cl349@firebug.cl.cam.ac.uk
date Thu Feb 02 17:16:00 2006 +0000 (2006-02-02)
parents 06d84bf87159
children 71b0f00f6344
line source
1 /*
2 * Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by the
6 * Free Software Foundation; either version 2 of the License, or (at your
7 * option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
11 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * for more details.
13 *
14 * You should have received a copy of the GNU General Public License along
15 * with this program; if not, write to the Free software Foundation, Inc.,
16 * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA
17 *
18 */
19 #include <linux/config.h>
20 #include <linux/module.h>
22 #include <linux/types.h>
23 #include <linux/kernel.h>
24 #include <linux/errno.h>
26 #include <asm/uaccess.h>
28 #include <linux/slab.h>
30 #include <linux/proc_fs.h>
31 #include <linux/string.h>
33 #include <linux/net.h>
34 #include <linux/in.h>
35 #include <linux/inet.h>
36 #include <linux/netdevice.h>
38 #include <sa.h>
39 #include "vif.h"
40 #include "vnet.h"
41 #include "varp.h"
42 #include "vnet_dev.h"
44 #include "sxpr_parser.h"
45 #include "iostream.h"
46 #include "kernel_stream.h"
47 #include "sys_string.h"
48 #include "sys_net.h"
50 #define MODULE_NAME "VNET"
51 #define DEBUG 1
52 #undef DEBUG
53 #include "debug.h"
55 // Functions to manage vnets.
56 /*
58 Have to rely on ethernet bridging being configured - but we can't rely
59 on the kernel interface being available to us (it's not exported @!$"%!).
61 Create a vnet N:
62 - create the vnet device vnifN: using commands to /proc, kernel api
63 - create the vnet bridge vnetN: using brctl in user-space
64 - for best results something should keep track of the mapping vnet id <-> bridge name
66 Add vif device vifD.N to vnet N.
67 - domain is configured with vifD.N on bridge vnetN
68 - vif script adds vif to bridge using brctl
69 - vif script detects that the bridge is a vnet bridge and
70 uses /proc commands to configure the mac on the vnet
72 Wouldn't be hard to add support for specifying vnet keys(s) in
73 the control interface.
75 */
77 // id vnet id
78 // security security level
79 // ciphersuite: digest, cipher, keys??
80 /* Security policy.
81 vnet
82 src: mac
83 dst: mac
84 coa: ip
85 Map vnet x coa -> security (none, auth, conf)
87 Policy, e.g.
88 - same subnet x vnet
89 - diff subnet x vnet
90 - some subnet x vnet
91 - some host addr x vnet
93 (security (net local) (vnet *) (mode none))
94 (security (net (not local))
96 (security (addr, vnet) (local-subnet addr) none)
97 (security (addr, vnet) (not (local-subnet addr)) conf)
98 (security (addr, vnet) (host 15.144.27.80)
99 (security (addr, vnet) (subnet addr 15.144.24.0/24) auth)
100 (security (addr, vnet) t auth)
102 (security (addr local) (mode none))
103 (security (addr local/16) (mode none))
104 (security (addr 15.144.0.0/16) (mode auth))
105 (security (addr 15.0.0.0/8) (mode conf))
106 (security (addr *) (mode drop))
108 ?Varp security
109 Use esp too - none, auth, conf,
110 Varp sends broadcasts (requests) and unicasts (replies).
111 Uses UDP. Could send over ESP if needed.
112 For bcast don't know where it goes, so security has to be by vnet.
113 For ucast know where it goes, so could do by vnet and addr.
115 Similar issue for vnets: know where unicast goes but don't know where
116 bcast goes.
118 Simplify: 2 levels
119 local ucast
120 nonlocal ucast, mcast
122 (security (local none) (nonlocal conf))
123 (security (local auth) (nonlocal conf))
125 VARP security matches vnet security.
127 */
129 /** @file
130 *
131 * Kernel interface to files in /proc.
132 */
134 #define PROC_ROOT "/proc/"
135 #define PROC_ROOT_LEN 6
136 #define MODULE_ROOT PROC_ROOT "vnet"
138 enum {
139 VNET_POLICY = 1,
140 };
142 typedef struct proc_dir_entry ProcEntry;
143 typedef struct inode Inode;
144 typedef struct file File;
146 static int proc_open_fn(struct inode *inode, File *file);
147 static ssize_t proc_read_fn(File *file, char *buffer, size_t count, loff_t *offset);
148 static ssize_t proc_write_fn(File *file, const char *buffer, size_t count, loff_t *offset) ;
149 //static int proc_flush_fn(File *file);
150 static loff_t proc_lseek_fn(File * file, loff_t offset, int orig);
151 static int proc_ioctl_fn(struct inode *inode, File *file, unsigned opcode, unsigned long arg);
152 static int proc_release_fn(struct inode *inode, File *file);
154 static int eval(Sxpr exp);
156 static int ProcEntry_has_name(ProcEntry *entry, const char *name, int namelen){
157 dprintf("> name=%.*s entry=%.*s\n", namelen, name, entry->namelen, entry->name);
158 if(!entry || !entry->low_ino) return FALSE;
159 if(entry->namelen != namelen) return FALSE;
160 return memcmp(name, entry->name, namelen) == 0;
161 }
163 // Set f->f_error on error?
164 // Does interface stop r/w on first error?
165 // Is release called after an error?
166 //
168 static struct file_operations proc_file_ops = {
169 //owner: THIS_MODULE,
170 open: proc_open_fn,
171 read: proc_read_fn,
172 write: proc_write_fn,
173 //flush: proc_flush_fn,
174 llseek: proc_lseek_fn,
175 ioctl: proc_ioctl_fn,
176 release: proc_release_fn,
177 };
179 static int proc_get_parser(File *file, Parser **val){
180 int err = 0;
181 Parser *parser = NULL;
182 parser = file->private_data;
183 if(!parser){
184 parser = Parser_new();
185 if(!parser){
186 err = -ENOMEM;
187 goto exit;
188 }
189 file->private_data = parser;
190 }
191 exit:
192 *val = parser;
193 return err;
194 }
196 static int proc_open_fn(Inode *inode, File *file){
197 // User open.
198 // Return errcode or 0 on success.
199 // Can stuff data in file->private_data (void*).
200 // Get entry from
201 //ProcEntry *entry = (ProcEntry *)inode->u.generic_ip;
202 //file->private_data = NULL;
203 // Check for user privilege - deny otherwise.
204 // -EACCESS
205 int err = 0;
206 dprintf(">\n");
207 file->private_data = NULL;
208 return err;
209 }
211 static ssize_t proc_read_fn(File *file, char *buffer,
212 size_t count, loff_t *offset){
213 // User read.
214 // Copy data to user buffer, increment offset by count, return count.
215 dprintf(">\n");
216 count = 0;
217 //if(copy_to_user(buffer, data, count)){
218 // return -EFAULT;
219 //}
220 //*offset += count;
221 return count;
222 }
224 static ssize_t proc_write_fn(File *file, const char *buffer,
225 size_t count, loff_t *offset) {
226 // User write.
227 // Copy data into kernel space from buffer.
228 // Increment offset by count, return count (or code).
229 int err = 0;
230 char *data = NULL;
231 Parser *parser = NULL;
233 //dprintf("> count=%d\n", count);
234 err = proc_get_parser(file, &parser);
235 if(err) goto exit;
236 data = allocate(count);
237 if(!data){
238 err = -ENOMEM;
239 goto exit;
240 }
241 err = copy_from_user(data, buffer, count);
242 if(err) goto exit;
243 *offset += count;
244 err = Parser_input(parser, data, count);
245 exit:
246 deallocate(data);
247 err = (err < 0 ? err : count);
248 //dprintf("< err = %d\n", err);
249 return err;
250 }
252 #if 0
253 static int proc_flush_fn(File *file){
254 // User flush.
255 int writing = (file->f_flags & O_ACCMODE) == O_WRONLY;
256 int f_count = atomic_read(&file->f_count);
257 if (writing && f_count == 1) {
258 ProcEntry *pentry = (ProcEntry *)file->f_dentry->d_inode->u.generic_ip;
259 // ...
260 }
261 return retval;
262 }
263 #endif
265 #ifndef SEEK_SET
266 enum {
267 /** Offset from start. */
268 SEEK_SET = 0,
269 /** Offset from current position. */
270 SEEK_CUR = 1,
271 /** Offset from size of file. */
272 SEEK_END = 2
273 };
274 #endif /* !SEEK_SET */
276 static loff_t proc_lseek_fn(File * file, loff_t offset, int from){
277 // User lseek.
278 dprintf(">\n");
279 switch(from){
280 case SEEK_SET:
281 break;
282 case SEEK_CUR:
283 offset += file->f_pos;
284 break;
285 case SEEK_END:
286 return -EINVAL;
287 default:
288 return -EINVAL;
289 }
290 if(offset < 0) return -EINVAL;
291 file->f_pos = offset;
292 return offset;
293 }
295 static int proc_ioctl_fn(Inode *inode, File *file,
296 unsigned opcode, unsigned long arg){
297 // User ioctl.
298 dprintf(">\n");
299 return 0;
300 }
302 static int proc_release_fn(Inode *inode, File *file){
303 // User close.
304 // Cleanup file->private_data, return errcode.
305 int err = 0;
306 Parser *parser = NULL;
307 Sxpr obj, l;
309 dprintf(">\n");
310 err = proc_get_parser(file, &parser);
311 if(err) goto exit;
312 err = Parser_input(parser, NULL, 0);
313 if(err) goto exit;
314 obj = parser->val;
315 for(l = obj; CONSP(l); l = CDR(l)){
316 err = eval(CAR(l));
317 if(err) break;
318 }
319 exit:
320 Parser_free(parser);
321 file->private_data = NULL;
322 dprintf("< err=%d\n", err);
323 return err;
324 }
326 static ProcEntry *proc_fs_root = &proc_root;
328 static int proc_path_init(const char *path, const char **rest){
329 int err = 0;
331 if(!path){
332 err = -EINVAL;
333 goto exit;
334 }
335 if(*path == '/'){
336 if(strncmp(PROC_ROOT, path, PROC_ROOT_LEN)){
337 err = -EINVAL;
338 } else {
339 path += PROC_ROOT_LEN;
340 }
341 }
342 exit:
343 *rest = path;
344 return err;
345 }
348 /** Parse a path relative to `dir'. If dir is null or the proc root
349 * the path is relative to "/proc/", and the leading "/proc/" may be
350 * supplied.
351 *
352 */
353 static ProcEntry * ProcFS_lookup(const char *path, ProcEntry *dir){
354 const char *pathptr = path, *next = NULL;
355 ProcEntry *entry, *result = NULL;
356 int pathlen;
358 if(dir && (dir != proc_fs_root)){
359 entry = dir;
360 } else {
361 if(proc_path_init(path, &pathptr)) goto exit;
362 entry = proc_fs_root;
363 }
364 if(!pathptr || !*pathptr) goto exit;
365 while(1){
366 next = strchr(pathptr, '/');
367 pathlen = (next ? next - pathptr : strlen(pathptr));
368 for(entry = entry->subdir; entry ; entry = entry->next) {
369 if(ProcEntry_has_name(entry, pathptr, pathlen)) break;
370 }
371 if (!entry) break;
372 if(!next){
373 result = entry;
374 break;
375 }
376 pathptr = next + 1;
377 }
378 exit:
379 return result;
380 }
382 static ProcEntry *ProcFS_register(const char *name, ProcEntry *dir, int val){
383 mode_t mode = 0;
384 ProcEntry *entry;
386 entry = create_proc_entry(name, mode, dir);
387 if(entry){
388 entry->proc_fops = &proc_file_ops;
389 entry->data = (void*)val; // Whatever data we need.
390 }
391 return entry;
392 }
394 static ProcEntry *ProcFS_mkdir(const char *name, ProcEntry *parent){
395 ProcEntry *entry = NULL;
396 entry = ProcFS_lookup(name, parent);
397 if(!entry){
398 const char *path;
399 if(proc_path_init(name, &path)) goto exit;
400 entry = proc_mkdir(path, parent);
401 }
402 exit:
403 return entry;
404 }
406 static void ProcFS_remove(const char *name, ProcEntry *parent){
407 remove_proc_entry(name, parent);
408 }
410 static void ProcFS_rmrec_entry(ProcEntry *entry){
411 if(entry){
412 // Don't want to remove /proc itself!
413 if(entry->parent == entry) return;
414 while(entry->subdir){
415 ProcFS_rmrec_entry(entry->subdir);
416 }
417 dprintf("> remove %s\n", entry->name);
418 ProcFS_remove(entry->name, entry->parent);
419 }
420 }
422 static void ProcFS_rmrec(const char *name, ProcEntry *parent){
423 ProcEntry *entry;
425 dprintf("> name=%s\n", name);
426 entry = ProcFS_lookup(name, parent);
427 if(entry){
428 ProcFS_rmrec_entry(entry);
429 }
430 dprintf("<\n");
431 }
433 static int stringof(Sxpr exp, char **s){
434 int err = 0;
435 if(ATOMP(exp)){
436 *s = atom_name(exp);
437 } else if(STRINGP(exp)){
438 *s = string_string(exp);
439 } else {
440 err = -EINVAL;
441 *s = NULL;
442 }
443 return err;
444 }
446 static int child_string(Sxpr exp, Sxpr key, char **s){
447 int err = 0;
448 Sxpr val = sxpr_child_value(exp, key, ONONE);
449 err = stringof(val, s);
450 return err;
451 }
453 #if 0
454 static int intof(Sxpr exp, int *v){
455 int err = 0;
456 char *s;
457 unsigned long l;
458 if(INTP(exp)){
459 *v = OBJ_INT(exp);
460 } else {
461 err = stringof(exp, &s);
462 if(err) goto exit;
463 err = convert_atoul(s, &l);
464 *v = (int)l;
465 }
466 exit:
467 return err;
468 }
470 static int child_int(Sxpr exp, Sxpr key, int *v){
471 int err = 0;
472 Sxpr val = sxpr_child_value(exp, key, ONONE);
473 err = intof(val, v);
474 return err;
475 }
476 #endif
478 static int vnetof(Sxpr exp, VnetId *v){
479 int err = 0;
480 char *s;
481 err = stringof(exp, &s);
482 if(err) goto exit;
483 err = VnetId_aton(s, v);
484 exit:
485 return err;
486 }
488 static int child_vnet(Sxpr exp, Sxpr key, VnetId *v){
489 int err = 0;
490 Sxpr val = sxpr_child_value(exp, key, ONONE);
491 err = vnetof(val, v);
492 return err;
493 }
495 static int macof(Sxpr exp, unsigned char *v){
496 int err = 0;
497 char *s;
498 err = stringof(exp, &s);
499 if(err) goto exit;
500 err = mac_aton(s, v);
501 exit:
502 return err;
503 }
505 static int child_mac(Sxpr exp, Sxpr key, unsigned char *v){
506 int err = 0;
507 Sxpr val = sxpr_child_value(exp, key, ONONE);
508 err = macof(val, v);
509 return err;
510 }
512 static int addrof(Sxpr exp, uint32_t *v){
513 int err = 0;
514 char *s;
515 unsigned long w;
516 err = stringof(exp, &s);
517 if(err) goto exit;
518 err = get_inet_addr(s, &w);
519 if(err) goto exit;
520 *v = (uint32_t)w;
521 exit:
522 return err;
523 }
525 static int child_addr(Sxpr exp, Sxpr key, uint32_t *v){
526 int err = 0;
527 Sxpr val = sxpr_child_value(exp, key, ONONE);
528 err = addrof(val, v);
529 return err;
530 }
532 /** Create a vnet.
533 * It is an error if a vnet with the same id exists.
534 *
535 * @param vnet vnet id
536 * @param device vnet device name
537 * @param security security level
538 * @return 0 on success, error code otherwise
539 */
540 static int ctrl_vnet_add(VnetId *vnet, char *device, int security){
541 int err = 0;
542 Vnet *vnetinfo = NULL;
544 if(strlen(device) >= IFNAMSIZ){
545 err = -EINVAL;
546 goto exit;
547 }
548 if(Vnet_lookup(vnet, &vnetinfo) == 0){
549 err = -EEXIST;
550 goto exit;
551 }
552 err = Vnet_alloc(&vnetinfo);
553 if(err) goto exit;
554 vnetinfo->vnet = *vnet;
555 vnetinfo->security = security;
556 strcpy(vnetinfo->device, device);
557 err = Vnet_create(vnetinfo);
558 exit:
559 if(vnetinfo) Vnet_decref(vnetinfo);
560 return err;
561 }
563 /** Delete a vnet.
564 *
565 * @param vnet vnet id
566 * @return 0 on success, error code otherwise
567 */
568 static int ctrl_vnet_del(VnetId *vnet){
569 int err = -ENOSYS;
570 // Can't delete if there are any vifs on the vnet.
572 // Need to flush vif entries for the deleted vnet.
573 // Need to flush varp entries for the deleted vnet.
574 // Note that (un)register_netdev() hold rtnl_lock() around
575 // (un)register_netdevice().
577 //Vnet_del(vnet);
578 return err;
579 }
581 /** Create an entry for a vif with the given vnet and vmac.
582 *
583 * @param vnet vnet id
584 * @param vmac mac address
585 * @return 0 on success, error code otherwise
586 */
587 static int ctrl_vif_add(VnetId *vnet, Vmac *vmac){
588 int err = 0;
589 Vnet *vnetinfo = NULL;
590 Vif *vif = NULL;
592 dprintf(">\n");
593 err = Vnet_lookup(vnet, &vnetinfo);
594 if(err) goto exit;
595 err = vif_create(vnet, vmac, &vif);
596 exit:
597 if(vnetinfo) Vnet_decref(vnetinfo);
598 if(vif) vif_decref(vif);
599 dprintf("< err=%d\n", err);
600 return err;
601 }
603 /** Delete a vif.
604 *
605 * @param vnet vnet id
606 * @param vmac mac address
607 * @return 0 on success, error code otherwise
608 */
609 static int ctrl_vif_del(VnetId *vnet, Vmac *vmac){
610 int err = 0;
611 Vnet *vnetinfo = NULL;
612 Vif *vif = NULL;
614 dprintf(">\n");
615 err = Vnet_lookup(vnet, &vnetinfo);
616 if(err) goto exit;
617 err = vif_lookup(vnet, vmac, &vif);
618 if(err) goto exit;
619 vif_remove(vnet, vmac);
620 exit:
621 if(vnetinfo) Vnet_decref(vnetinfo);
622 if(vif) vif_decref(vif);
623 dprintf("< err=%d\n", err);
624 return err;
625 }
627 /** (varp.print)
628 */
629 static int eval_varp_print(Sxpr exp){
630 int err = 0;
631 varp_print();
632 return err;
633 }
635 /** (varp.mcaddr (addr <addr>))
636 */
637 static int eval_varp_mcaddr(Sxpr exp){
638 int err =0;
639 Sxpr oaddr = intern("addr");
640 uint32_t addr;
642 err = child_addr(exp, oaddr, &addr);
643 if(err < 0) goto exit;
644 varp_set_mcast_addr(addr);
645 exit:
646 return err;
647 }
649 /** (varp.flush)
650 */
651 static int eval_varp_flush(Sxpr exp){
652 int err = 0;
653 varp_flush();
654 return err;
655 }
657 /** (vnet.add (id <id>)
658 * [(vnetif <name>)]
659 * [(security { none | auth | conf } )]
660 * )
661 */
662 static int eval_vnet_add(Sxpr exp){
663 int err = 0;
664 Sxpr oid = intern("id");
665 Sxpr osecurity = intern("security");
666 Sxpr ovnetif = intern("vnetif");
667 Sxpr csecurity;
668 VnetId vnet = {};
669 char *device = NULL;
670 char dev[IFNAMSIZ] = {};
671 char *security = NULL;
672 int sec;
674 err = child_vnet(exp, oid, &vnet);
675 if(err) goto exit;
676 child_string(exp, ovnetif, &device);
677 if(!device){
678 snprintf(dev, IFNAMSIZ-1, "vnif%04x", ntohs(vnet.u.vnet16[7]));
679 device = dev;
680 }
681 csecurity = sxpr_child_value(exp, osecurity, intern("none"));
682 err = stringof(csecurity, &security);
683 if(err) goto exit;
684 if(strcmp(security, "none")==0){
685 sec = 0;
686 } else if(strcmp(security, "auth")==0){
687 sec = SA_AUTH;
688 } else if(strcmp(security, "conf")==0){
689 sec = SA_CONF;
690 } else {
691 err = -EINVAL;
692 goto exit;
693 }
694 err = ctrl_vnet_add(&vnet, device, sec);
695 exit:
696 dprintf("< err=%d\n", err);
697 return err;
698 }
700 /** Delete a vnet.
701 *
702 * (vnet.del (id <id>))
703 *
704 * @param vnet vnet id
705 * @return 0 on success, error code otherwise
706 */
707 static int eval_vnet_del(Sxpr exp){
708 int err = 0;
709 Sxpr oid = intern("id");
710 VnetId vnet = {};
712 err = child_vnet(exp, oid, &vnet);
713 if(err) goto exit;
714 err = ctrl_vnet_del(&vnet);
715 exit:
716 return err;
717 }
719 /** (vif.add (vnet <vnet>) (vmac <macaddr>))
720 */
721 static int eval_vif_add(Sxpr exp){
722 int err = 0;
723 Sxpr ovnet = intern("vnet");
724 Sxpr ovmac = intern("vmac");
725 VnetId vnet = {};
726 Vmac vmac = {};
728 err = child_vnet(exp, ovnet, &vnet);
729 if(err) goto exit;
730 err = child_mac(exp, ovmac, vmac.mac);
731 if(err) goto exit;
732 err = ctrl_vif_add(&vnet, &vmac);
733 exit:
734 return err;
735 }
737 /** (vif.del (vnet <vnet>) (vmac <macaddr>))
738 */
739 static int eval_vif_del(Sxpr exp){
740 int err = 0;
741 Sxpr ovnet = intern("vnet");
742 Sxpr ovmac = intern("vmac");
743 VnetId vnet = {};
744 Vmac vmac = {};
746 err = child_vnet(exp, ovnet, &vnet);
747 if(err) goto exit;
748 err = child_mac(exp, ovmac, vmac.mac);
749 if(err) goto exit;
750 err = ctrl_vif_del(&vnet, &vmac);
751 exit:
752 return err;
753 }
755 typedef struct SxprEval {
756 Sxpr elt;
757 int (*fn)(Sxpr);
758 } SxprEval;
760 static int eval(Sxpr exp){
761 int err = 0;
762 SxprEval defs[] = {
763 { intern("varp.print"), eval_varp_print },
764 { intern("varp.mcaddr"), eval_varp_mcaddr },
765 { intern("varp.flush"), eval_varp_flush },
766 { intern("vif.add"), eval_vif_add },
767 { intern("vif.del"), eval_vif_del },
768 { intern("vnet.add"), eval_vnet_add },
769 { intern("vnet.del"), eval_vnet_del },
770 { ONONE, NULL } };
771 SxprEval *def;
773 iprintf("> "); objprint(iostdout, exp, 0); IOStream_print(iostdout, "\n");
774 err = -ENOSYS;
775 for(def = defs; !NONEP(def->elt); def++){
776 if(sxpr_elementp(exp, def->elt)){
777 err = def->fn(exp);
778 break;
779 }
780 }
781 iprintf("< err=%d\n", err);
782 return err;
783 }
785 void __init ProcFS_init(void){
786 ProcEntry *root_entry;
787 ProcEntry *policy_entry;
789 dprintf(">\n");
790 root_entry = ProcFS_mkdir(MODULE_ROOT, NULL);
791 if(!root_entry) goto exit;
792 policy_entry = ProcFS_register("policy", root_entry, VNET_POLICY);
793 exit:
794 dprintf("<\n");
795 }
797 void __exit ProcFS_exit(void){
798 dprintf(">\n");
799 ProcFS_rmrec(MODULE_ROOT, NULL);
800 dprintf("<\n");
801 }