ia64/xen-unstable

changeset 165:1b51468b4a9c

bitkeeper revision 1.34 (3e4cfce1Ot4YJzJ4jwNjDthlFrZV5w)

conflict resolution.
author akw27@boulderdash.cl.cam.ac.uk
date Fri Feb 14 14:27:45 2003 +0000 (2003-02-14)
parents 53760d4155cf 2a4e70540c0d
children f5723f60515a
files .rootkeys BitKeeper/etc/logging_ok xen-2.4.16/arch/i386/ioremap.c xen-2.4.16/common/domain.c xen-2.4.16/common/event.c xen-2.4.16/common/kernel.c xen-2.4.16/common/memory.c xen-2.4.16/common/network.c xen-2.4.16/drivers/net/3c509.c xen-2.4.16/drivers/net/3c59x.c xen-2.4.16/drivers/net/8139cp.c xen-2.4.16/drivers/net/8139too.c xen-2.4.16/drivers/net/eepro100.c xen-2.4.16/drivers/net/pcnet32.c xen-2.4.16/drivers/net/tg3.c xen-2.4.16/drivers/net/tg3.h xen-2.4.16/drivers/net/tulip/interrupt.c xen-2.4.16/drivers/net/tulip/tulip_core.c xen-2.4.16/include/asm-i386/flushtlb.h xen-2.4.16/include/asm-i386/page.h xen-2.4.16/include/asm-i386/pci.h xen-2.4.16/include/hypervisor-ifs/hypervisor-if.h xen-2.4.16/include/hypervisor-ifs/network.h xen-2.4.16/include/xeno/mm.h xen-2.4.16/include/xeno/skbuff.h xen-2.4.16/include/xeno/vif.h xen-2.4.16/net/dev.c xen-2.4.16/net/eth.c xen-2.4.16/net/skbuff.c xenolinux-2.4.16-sparse/arch/xeno/drivers/network/network.c xenolinux-2.4.16-sparse/include/asm-xeno/io.h xenolinux-2.4.16-sparse/include/linux/skbuff.h xenolinux-2.4.16-sparse/net/core/skbuff.c
line diff
     1.1 --- a/.rootkeys	Fri Feb 14 13:18:19 2003 +0000
     1.2 +++ b/.rootkeys	Fri Feb 14 14:27:45 2003 +0000
     1.3 @@ -77,10 +77,8 @@ 3e4a8d40XMqvT05EwZwJg1HMsFDUBA xen-2.4.1
     1.4  3ddb79bdkDY1bSOYkToP1Cc49VdBxg xen-2.4.16/drivers/ide/ide.c
     1.5  3ddb79bdPyAvT_WZTAFhaX0jp-yXSw xen-2.4.16/drivers/ide/ide_modes.h
     1.6  3e4a8d401aSwOzCScQXR3lsmNlAwUQ xen-2.4.16/drivers/ide/piix.c
     1.7 -3ddb79bfogeJNHTIepPjd8fy1TyoTw xen-2.4.16/drivers/net/3c509.c
     1.8  3ddb79bfMlOcWUwjtg6oMYhGySHDDw xen-2.4.16/drivers/net/3c59x.c
     1.9  3ddb79bfl_DWxZQFKiJ2BXrSedV4lg xen-2.4.16/drivers/net/8139cp.c
    1.10 -3ddb79bfLVGtyXNJS4NQg-lP21rndA xen-2.4.16/drivers/net/8139too.c
    1.11  3ddb79c0tWiE8xIFHszxipeVCGKTSA xen-2.4.16/drivers/net/Makefile
    1.12  3ddb79bfU-H1Hms4BuJEPPydjXUEaQ xen-2.4.16/drivers/net/Space.c
    1.13  3e4540ccS4bfbx9rLiLElP0F1OVwZA xen-2.4.16/drivers/net/e1000/LICENSE
    1.14 @@ -160,6 +158,7 @@ 3ddb79c34BFiXjBJ_cCKB0aCsV1IDw xen-2.4.1
    1.15  3e20b82fl1jmQiKdLy7fxMcutfpjWA xen-2.4.16/include/asm-i386/domain_page.h
    1.16  3ddb79c2O729EttZTYu1c8LcsUO_GQ xen-2.4.16/include/asm-i386/elf.h
    1.17  3ddb79c3NU8Zy40OTrq3D-i30Y3t4A xen-2.4.16/include/asm-i386/fixmap.h
    1.18 +3e2d29944GI24gf7vOP_7x8EyuqxeA xen-2.4.16/include/asm-i386/flushtlb.h
    1.19  3ddb79c39o75zPP0T1aQQ4mNrCAN2w xen-2.4.16/include/asm-i386/hardirq.h
    1.20  3ddb79c3BFEIwXR4IsWbwp4BoL4DkA xen-2.4.16/include/asm-i386/hdreg.h
    1.21  3ddb79c3TMDjkxVndKFKnGiwY0HzDg xen-2.4.16/include/asm-i386/i387.h
    1.22 @@ -442,6 +441,7 @@ 3ddb79bbqhb9X9qWOz5Bv4wOzrkITg xenolinux
    1.23  3ddb79bbA52x94o6uwDYsbzrH2hjzA xenolinux-2.4.16-sparse/include/asm-xeno/xor.h
    1.24  3e4a8cb7ON8EclY3NN3YPXyMT941hA xenolinux-2.4.16-sparse/include/linux/blk.h
    1.25  3e4a8cb7GJrKD0z7EF0VZOhdEa01Mw xenolinux-2.4.16-sparse/include/linux/major.h
    1.26 +3e37c39fVCSGQENtY6g7muaq_THliw xenolinux-2.4.16-sparse/include/linux/skbuff.h
    1.27  3ddb79bb_7YG4U75ZmEic9YXWTW7Vw xenolinux-2.4.16-sparse/include/linux/sunrpc/debug.h
    1.28  3e4a8cb7j05wwb1uPZgY16s68o7qAw xenolinux-2.4.16-sparse/init/main.c
    1.29  3ddb79bcxkVPfWlZ1PQKvDrfArzOVw xenolinux-2.4.16-sparse/kernel/panic.c
    1.30 @@ -449,3 +449,4 @@ 3ddb79bbP31im-mx2NbfthSeqty1Dg xenolinux
    1.31  3e15d52e0_j129JPvo7xfYGndVFpwQ xenolinux-2.4.16-sparse/mm/memory.c
    1.32  3e15d535DLvpzTrLRUIerB69LpJD1g xenolinux-2.4.16-sparse/mm/mremap.c
    1.33  3e15d531m1Y1_W8ki64AFOU_ua4C4w xenolinux-2.4.16-sparse/mm/swapfile.c
    1.34 +3e37c312QFuzIxXsuAgO6IRt3Tp96Q xenolinux-2.4.16-sparse/net/core/skbuff.c
     2.1 --- a/BitKeeper/etc/logging_ok	Fri Feb 14 13:18:19 2003 +0000
     2.2 +++ b/BitKeeper/etc/logging_ok	Fri Feb 14 14:27:45 2003 +0000
     2.3 @@ -1,5 +1,6 @@
     2.4  akw27@boulderdash.cl.cam.ac.uk
     2.5  akw27@labyrinth.cl.cam.ac.uk
     2.6 +akw27@plucky.localdomain
     2.7  bd240@boulderdash.cl.cam.ac.uk
     2.8  iap10@labyrinth.cl.cam.ac.uk
     2.9  kaf24@labyrinth.cl.cam.ac.uk
     4.1 --- a/xen-2.4.16/common/domain.c	Fri Feb 14 13:18:19 2003 +0000
     4.2 +++ b/xen-2.4.16/common/domain.c	Fri Feb 14 14:27:45 2003 +0000
     4.3 @@ -12,6 +12,7 @@
     4.4  #include <xeno/dom0_ops.h>
     4.5  #include <asm/io.h>
     4.6  #include <asm/domain_page.h>
     4.7 +#include <asm/flushtlb.h>
     4.8  #include <asm/msr.h>
     4.9  #include <xeno/multiboot.h>
    4.10  
    4.11 @@ -175,10 +176,13 @@ unsigned int alloc_new_dom_mem(struct ta
    4.12      struct pfn_info *pf;
    4.13      unsigned int alloc_pfns;
    4.14      unsigned int req_pages;
    4.15 +    unsigned long flags;
    4.16  
    4.17      /* how many pages do we need to alloc? */
    4.18      req_pages = kbytes >> (PAGE_SHIFT - 10);
    4.19  
    4.20 +    spin_lock_irqsave(&free_list_lock, flags);
    4.21 +    
    4.22      /* is there enough mem to serve the request? */   
    4.23      if ( req_pages > free_pfns ) return -1;
    4.24      
    4.25 @@ -194,6 +198,8 @@ unsigned int alloc_new_dom_mem(struct ta
    4.26          list_add_tail(&pf->list, &p->pg_head);
    4.27          free_pfns--;
    4.28      }
    4.29 +   
    4.30 +    spin_unlock_irqrestore(&free_list_lock, flags);
    4.31      
    4.32      p->tot_pages = req_pages;
    4.33  
    4.34 @@ -350,6 +356,7 @@ static unsigned long alloc_page_from_dom
    4.35   */
    4.36  int setup_guestos(struct task_struct *p, dom0_newdomain_t *params)
    4.37  {
    4.38 +
    4.39      struct list_head *list_ent;
    4.40      char *src, *dst;
    4.41      int i, dom = p->domain;
    4.42 @@ -517,8 +524,7 @@ int setup_guestos(struct task_struct *p,
    4.43  
    4.44      /* Install the new page tables. */
    4.45      __cli();
    4.46 -    __asm__ __volatile__ (
    4.47 -        "mov %%eax,%%cr3" : : "a" (pagetable_val(p->mm.pagetable)));
    4.48 +    __write_cr3_counted(pagetable_val(p->mm.pagetable));
    4.49  
    4.50      /* Copy the guest OS image. */
    4.51      src = (char *)__va(mod[0].mod_start + 12);
    4.52 @@ -594,8 +600,7 @@ int setup_guestos(struct task_struct *p,
    4.53      }
    4.54  
    4.55      /* Reinstate the caller's page tables. */
    4.56 -    __asm__ __volatile__ (
    4.57 -        "mov %%eax,%%cr3" : : "a" (pagetable_val(current->mm.pagetable)));    
    4.58 +    __write_cr3_counted(pagetable_val(current->mm.pagetable));
    4.59      __sti();
    4.60  
    4.61      new_thread(p, 
     5.1 --- a/xen-2.4.16/common/event.c	Fri Feb 14 13:18:19 2003 +0000
     5.2 +++ b/xen-2.4.16/common/event.c	Fri Feb 14 14:27:45 2003 +0000
     5.3 @@ -14,14 +14,14 @@
     5.4  typedef void (*hyp_event_callback_fn_t)(void);
     5.5  
     5.6  extern void schedule(void);
     5.7 -extern void flush_rx_queue(void);
     5.8  extern void flush_blk_queue(void);
     5.9 +extern void update_shared_ring(void);
    5.10  
    5.11  /* Ordering must match definitions of _HYP_EVENT_* in xeno/sched.h */
    5.12  static hyp_event_callback_fn_t event_call_fn[] = 
    5.13  {
    5.14      schedule,
    5.15 -    flush_rx_queue,
    5.16 +    update_shared_ring,
    5.17      kill_domain, 
    5.18      flush_blk_queue
    5.19  };
     6.1 --- a/xen-2.4.16/common/kernel.c	Fri Feb 14 13:18:19 2003 +0000
     6.2 +++ b/xen-2.4.16/common/kernel.c	Fri Feb 14 14:27:45 2003 +0000
     6.3 @@ -14,6 +14,7 @@
     6.4  #include <xeno/dom0_ops.h>
     6.5  #include <asm/byteorder.h>
     6.6  #include <linux/if_ether.h>
     6.7 +#include <asm/domain_page.h>
     6.8  
     6.9  /* VGA text definitions. */
    6.10  #define COLUMNS	    80
    6.11 @@ -301,11 +302,11 @@ void panic(const char *fmt, ...)
    6.12      char buf[1024], *p;
    6.13      unsigned long flags;
    6.14      extern void machine_restart(char *);
    6.15 -
    6.16 +    
    6.17      va_start(args, fmt);
    6.18      (void)vsnprintf(buf, sizeof(buf), fmt, args);
    6.19      va_end(args);
    6.20 -  
    6.21 +    
    6.22      /* Spit out multiline message in one go. */
    6.23      spin_lock_irqsave(&console_lock, flags);
    6.24      __putstr("\n****************************************\n");
    6.25 @@ -357,6 +358,7 @@ unsigned short compute_cksum(unsigned sh
    6.26  /* XXX SMH: below is rather vile; pulled in to allow network console */
    6.27  
    6.28  extern int netif_rx(struct sk_buff *); 
    6.29 +extern struct net_device *the_dev;
    6.30  
    6.31  typedef struct my_udphdr {
    6.32      __u16 source;
    6.33 @@ -407,17 +409,24 @@ int console_export(char *str, int len)
    6.34      struct my_udphdr *udph = NULL; 
    6.35      struct my_ethhdr *ethh = NULL; 
    6.36      int hdr_size = sizeof(struct my_iphdr) + sizeof(struct my_udphdr); 
    6.37 -    
    6.38 -    // Prepare console packet
    6.39 -    console_packet = alloc_skb(sizeof(struct my_ethhdr) + hdr_size + len, 
    6.40 -			       GFP_KERNEL);
    6.41 +    u8 *skb_data;
    6.42 +
    6.43 +    // Prepare console packet - the grim + 20 in the alloc is for headroom.
    6.44 +    console_packet = dev_alloc_skb(sizeof(struct my_ethhdr) + hdr_size + len + 20);
    6.45 +    if (!console_packet) return 0;
    6.46 +//console_packet->security = 9; // hack to trace these packets.
    6.47 +    console_packet->dev = the_dev;
    6.48 +    skb_data = map_domain_mem((unsigned long)console_packet->head);
    6.49 +    skb_reserve(console_packet, 2); // ip header alignment.
    6.50 +//printk("Eth is: %d\n", console_packet->data - console_packet->head);
    6.51 +    ethh   = (struct my_ethhdr *) skb_data + (console_packet->data - console_packet->head);
    6.52      skb_reserve(console_packet, sizeof(struct my_ethhdr)); 
    6.53 -    ethh   = (struct my_ethhdr *)console_packet->head;
    6.54  
    6.55      skb_put(console_packet, hdr_size + len); 
    6.56 -    iph  = (struct my_iphdr *)console_packet->data; 
    6.57 -	udph = (struct my_udphdr *)(iph + 1); 
    6.58 -	memcpy((char *)(udph + 1), str, len); 
    6.59 +//printk("IP is: %d\n", console_packet->data - console_packet->head);
    6.60 +    iph  = (struct my_iphdr *)skb_data + (console_packet->data - console_packet->head); 
    6.61 +    udph = (struct my_udphdr *)(iph + 1); 
    6.62 +    memcpy((char *)(udph + 1), str, len); 
    6.63  
    6.64      // Build IP header
    6.65      iph->version = 4;
    6.66 @@ -446,6 +455,13 @@ int console_export(char *str, int len)
    6.67      memcpy(ethh->h_dest, "000000", 6);
    6.68      ethh->h_proto = htons(ETH_P_IP);
    6.69      console_packet->mac.ethernet= (struct ethhdr *)ethh;
    6.70 +
    6.71 +    // Make the packet appear to come off the external NIC so that the 
    6.72 +    // tables code doesn't get too confused.
    6.73 +    console_packet->src_vif = VIF_PHYSICAL_INTERFACE;
    6.74 +    console_packet->dst_vif = 0;
    6.75 +    
    6.76 +    unmap_domain_mem(skb_data);
    6.77      
    6.78      // Pass the packet to netif_rx
    6.79      (void)netif_rx(console_packet);
    6.80 @@ -481,23 +497,14 @@ long do_console_write(char *str, int cou
    6.81  	
    6.82          if ( !safe_str[i] ) break;
    6.83          putchar(prev = safe_str[i]);
    6.84 -	
    6.85 -        if ( prev == '\n' )
    6.86 -        {
    6.87 -	    exported_str[j]='\0';
    6.88 -	    console_export(exported_str, j-1);
    6.89 -	    j=0;
    6.90 -        }
    6.91 -	
    6.92      }
    6.93 -    if ( prev != '\n' ) 
    6.94 -    {
    6.95 -	putchar('\n');
    6.96 -        exported_str[j]='\0';
    6.97 -        console_export(exported_str, j-1);
    6.98 -    }
    6.99 +    
   6.100 +    if ( prev != '\n' ) putchar('\n');
   6.101      
   6.102      spin_unlock_irqrestore(&console_lock, flags);
   6.103      
   6.104 +    exported_str[j]='\0';
   6.105 +    console_export(exported_str, j-1);
   6.106 +    
   6.107      return(0);
   6.108  }
     7.1 --- a/xen-2.4.16/common/memory.c	Fri Feb 14 13:18:19 2003 +0000
     7.2 +++ b/xen-2.4.16/common/memory.c	Fri Feb 14 14:27:45 2003 +0000
     7.3 @@ -171,6 +171,7 @@
     7.4  #include <xeno/sched.h>
     7.5  #include <xeno/errno.h>
     7.6  #include <asm/page.h>
     7.7 +#include <asm/flushtlb.h>
     7.8  #include <asm/io.h>
     7.9  #include <asm/uaccess.h>
    7.10  #include <asm/domain_page.h>
    7.11 @@ -205,6 +206,7 @@ unsigned long frame_table_size;
    7.12  unsigned long max_page;
    7.13  
    7.14  struct list_head free_list;
    7.15 +spinlock_t free_list_lock = SPIN_LOCK_UNLOCKED;
    7.16  unsigned int free_pfns;
    7.17  
    7.18  static int tlb_flush[NR_CPUS];
    7.19 @@ -218,6 +220,7 @@ void __init init_frametable(unsigned lon
    7.20  {
    7.21      struct pfn_info *pf;
    7.22      unsigned long page_index;
    7.23 +    unsigned long flags;
    7.24  
    7.25      memset(tlb_flush, 0, sizeof(tlb_flush));
    7.26  
    7.27 @@ -230,6 +233,7 @@ void __init init_frametable(unsigned lon
    7.28      free_pfns = 0;
    7.29  
    7.30      /* Put all domain-allocatable memory on a free list. */
    7.31 +    spin_lock_irqsave(&free_list_lock, flags);
    7.32      INIT_LIST_HEAD(&free_list);
    7.33      for( page_index = (__pa(frame_table) + frame_table_size) >> PAGE_SHIFT; 
    7.34           page_index < nr_pages;
    7.35 @@ -239,6 +243,7 @@ void __init init_frametable(unsigned lon
    7.36          list_add_tail(&pf->list, &free_list);
    7.37          free_pfns++;
    7.38      }
    7.39 +    spin_unlock_irqrestore(&free_list_lock, flags);
    7.40  }
    7.41  
    7.42  
    7.43 @@ -697,7 +702,6 @@ static int do_extended_command(unsigned 
    7.44      return err;
    7.45  }
    7.46  
    7.47 -
    7.48  int do_process_page_updates(page_update_request_t *ureqs, int count)
    7.49  {
    7.50      page_update_request_t req;
    7.51 @@ -807,11 +811,10 @@ int do_process_page_updates(page_update_
    7.52      if ( tlb_flush[smp_processor_id()] )
    7.53      {
    7.54          tlb_flush[smp_processor_id()] = 0;
    7.55 -        __asm__ __volatile__ (
    7.56 -            "movl %%eax,%%cr3" : : 
    7.57 -            "a" (pagetable_val(current->mm.pagetable)));
    7.58 +        __write_cr3_counted(pagetable_val(current->mm.pagetable));
    7.59  
    7.60      }
    7.61  
    7.62      return(0);
    7.63  }
    7.64 +
     8.1 --- a/xen-2.4.16/common/network.c	Fri Feb 14 13:18:19 2003 +0000
     8.2 +++ b/xen-2.4.16/common/network.c	Fri Feb 14 14:27:45 2003 +0000
     8.3 @@ -49,6 +49,7 @@ net_vif_t *create_net_vif(int domain)
     8.4  {
     8.5      net_vif_t *new_vif;
     8.6      net_ring_t *new_ring;
     8.7 +    net_shadow_ring_t *shadow_ring;
     8.8      struct task_struct *dom_task;
     8.9      
    8.10      if ( !(dom_task = find_domain_by_id(domain)) ) 
    8.11 @@ -64,7 +65,27 @@ net_vif_t *create_net_vif(int domain)
    8.12      new_ring = dom_task->net_ring_base + dom_task->num_net_vifs;
    8.13      memset(new_ring, 0, sizeof(net_ring_t));
    8.14  
    8.15 +    // allocate the shadow ring.  
    8.16 +    // maybe these should be kmem_cache instead of kmalloc?
    8.17 +    
    8.18 +    shadow_ring = kmalloc(sizeof(net_shadow_ring_t), GFP_KERNEL);
    8.19 +    if (shadow_ring == NULL) goto fail;
    8.20 +    
    8.21 +    shadow_ring->tx_ring = kmalloc(TX_RING_SIZE 
    8.22 +                    * sizeof(tx_shadow_entry_t), GFP_KERNEL);
    8.23 +    shadow_ring->rx_ring = kmalloc(RX_RING_SIZE
    8.24 +                    * sizeof(rx_shadow_entry_t), GFP_KERNEL);
    8.25 +    if ((shadow_ring->tx_ring == NULL) || (shadow_ring->rx_ring == NULL))
    8.26 +            goto fail;
    8.27 +
    8.28 +    shadow_ring->rx_prod = shadow_ring->rx_cons = shadow_ring->rx_idx = 0;
    8.29 +    
    8.30 +    // fill in the new vif struct.
    8.31 +    
    8.32      new_vif->net_ring = new_ring;
    8.33 +    new_vif->shadow_ring = shadow_ring;
    8.34 +    
    8.35 +                    
    8.36      skb_queue_head_init(&new_vif->skb_list);
    8.37      new_vif->domain = domain;
    8.38      
    8.39 @@ -77,6 +98,10 @@ net_vif_t *create_net_vif(int domain)
    8.40      dom_task->num_net_vifs++;
    8.41      
    8.42      return new_vif;
    8.43 +    
    8.44 +fail:
    8.45 +    printk("VIF allocation failed!\n");
    8.46 +    return NULL;
    8.47  }
    8.48  
    8.49  /* delete_net_vif - Delete the last vif in the given domain. 
    8.50 @@ -101,7 +126,10 @@ void destroy_net_vif(struct task_struct 
    8.51      write_lock(&sys_vif_lock);
    8.52      sys_vif_list[p->net_vif_list[i]->id] = NULL; // system vif list not gc'ed
    8.53      write_unlock(&sys_vif_lock);        
    8.54 -    
    8.55 +   
    8.56 +    kfree(p->net_vif_list[i]->shadow_ring->tx_ring);
    8.57 +    kfree(p->net_vif_list[i]->shadow_ring->rx_ring);
    8.58 +    kfree(p->net_vif_list[i]->shadow_ring);
    8.59      kmem_cache_free(net_vif_cache, p->net_vif_list[i]);
    8.60  }
    8.61  
    8.62 @@ -315,47 +343,56 @@ int net_find_rule(u8 nproto, u8 tproto, 
    8.63   * list.
    8.64   */
    8.65  
    8.66 -int net_get_target_vif(struct sk_buff *skb)
    8.67 +#define net_get_target_vif(skb) __net_get_target_vif(skb->data, skb->len, skb->src_vif)
    8.68 +
    8.69 +int __net_get_target_vif(u8 *data, unsigned int len, int src_vif)
    8.70  {
    8.71      int target = VIF_DROP;
    8.72 -    skb->h.raw = skb->nh.raw = skb->data;
    8.73 -    if ( skb->len < 2 ) goto drop;
    8.74 -    switch ( ntohs(skb->mac.ethernet->h_proto) )
    8.75 +    u8 *h_raw, *nh_raw;
    8.76 +    
    8.77 +    if ( len < 2 ) goto drop;
    8.78 +
    8.79 +    nh_raw = data + ETH_HLEN;
    8.80 +    switch ( ntohs(*(unsigned short *)(data + 12)) )
    8.81      {
    8.82      case ETH_P_ARP:
    8.83 -        if ( skb->len < 28 ) goto drop;
    8.84 -        target = net_find_rule((u8)ETH_P_ARP, 0, ntohl(*(u32 *)(skb->nh.raw + 14)),
    8.85 -                        ntohl(*(u32 *)(skb->nh.raw + 24)), 0, 0, 
    8.86 -                        skb->src_vif);
    8.87 +//printk("ARP!\n");
    8.88 +        if ( len < 28 ) goto drop;
    8.89 +        target = net_find_rule((u8)ETH_P_ARP, 0, ntohl(*(u32 *)(nh_raw + 14)),
    8.90 +                        ntohl(*(u32 *)(nh_raw + 24)), 0, 0, 
    8.91 +                        src_vif);
    8.92          break;
    8.93      case ETH_P_IP:
    8.94 -        if ( skb->len < 20 ) goto drop;
    8.95 -        skb->h.raw += ((*(unsigned char *)(skb->nh.raw)) & 0x0f) * 4;
    8.96 -        switch ( *(unsigned char *)(skb->nh.raw + 9) )
    8.97 +//printk("IP\n");
    8.98 +        if ( len < 20 ) goto drop;
    8.99 +        h_raw =  data + ((*(unsigned char *)(nh_raw)) & 0x0f) * 4;
   8.100 +        switch ( *(unsigned char *)(nh_raw + 9) )
   8.101          {
   8.102 -        case IPPROTO_TCP:
   8.103          case IPPROTO_UDP:
   8.104 -            target = net_find_rule((u8)ETH_P_IP,  *(u8 *)(skb->nh.raw + 9),
   8.105 -                    ntohl(*(u32 *)(skb->nh.raw + 12)),
   8.106 -                    ntohl(*(u32 *)(skb->nh.raw + 16)),
   8.107 -                    ntohs(*(u16 *)(skb->h.raw)),
   8.108 -                    ntohs(*(u16 *)(skb->h.raw + 2)), 
   8.109 -                    skb->src_vif);
   8.110 +//printk("UDP!\n");
   8.111 +        case IPPROTO_TCP:
   8.112 +            target = net_find_rule((u8)ETH_P_IP,  *(u8 *)(nh_raw + 9),
   8.113 +                    ntohl(*(u32 *)(nh_raw + 12)),
   8.114 +                    ntohl(*(u32 *)(nh_raw + 16)),
   8.115 +                    ntohs(*(u16 *)(h_raw)),
   8.116 +                    ntohs(*(u16 *)(h_raw + 2)), 
   8.117 +                    src_vif);
   8.118              break;
   8.119          default: // ip-based protocol where we don't have ports.
   8.120 -            target = net_find_rule((u8)ETH_P_IP,  *(u8 *)(skb->nh.raw + 9),
   8.121 -                    ntohl(*(u32 *)(skb->nh.raw + 12)),
   8.122 -                    ntohl(*(u32 *)(skb->nh.raw + 16)),
   8.123 +//printk("Other IP!\n");
   8.124 +            target = net_find_rule((u8)ETH_P_IP,  *(u8 *)(data + 9),
   8.125 +                    ntohl(*(u32 *)(nh_raw + 12)),
   8.126 +                    ntohl(*(u32 *)(nh_raw + 16)),
   8.127                      0,
   8.128                      0, 
   8.129 -                    skb->src_vif);
   8.130 +                    src_vif);
   8.131          }
   8.132          break;
   8.133      }
   8.134 -    skb->dst_vif=target;
   8.135      return target;
   8.136      
   8.137      drop:
   8.138 +//printk("Drop case!\n");
   8.139      return VIF_DROP;
   8.140  }
   8.141  
     9.1 --- a/xen-2.4.16/drivers/net/3c509.c	Fri Feb 14 13:18:19 2003 +0000
     9.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     9.3 @@ -1,1068 +0,0 @@
     9.4 -/* 3c509.c: A 3c509 EtherLink3 ethernet driver for linux. */
     9.5 -/*
     9.6 -	Written 1993-2000 by Donald Becker.
     9.7 -
     9.8 -	Copyright 1994-2000 by Donald Becker.
     9.9 -	Copyright 1993 United States Government as represented by the
    9.10 -	Director, National Security Agency.	 This software may be used and
    9.11 -	distributed according to the terms of the GNU General Public License,
    9.12 -	incorporated herein by reference.
    9.13 -
    9.14 -	This driver is for the 3Com EtherLinkIII series.
    9.15 -
    9.16 -	The author may be reached as becker@scyld.com, or C/O
    9.17 -	Scyld Computing Corporation
    9.18 -	410 Severn Ave., Suite 210
    9.19 -	Annapolis MD 21403
    9.20 -
    9.21 -	Known limitations:
    9.22 -	Because of the way 3c509 ISA detection works it's difficult to predict
    9.23 -	a priori which of several ISA-mode cards will be detected first.
    9.24 -
    9.25 -	This driver does not use predictive interrupt mode, resulting in higher
    9.26 -	packet latency but lower overhead.  If interrupts are disabled for an
    9.27 -	unusually long time it could also result in missed packets, but in
    9.28 -	practice this rarely happens.
    9.29 -
    9.30 -
    9.31 -	FIXES:
    9.32 -		Alan Cox:       Removed the 'Unexpected interrupt' bug.
    9.33 -		Michael Meskes:	Upgraded to Donald Becker's version 1.07.
    9.34 -		Alan Cox:	Increased the eeprom delay. Regardless of 
    9.35 -				what the docs say some people definitely
    9.36 -				get problems with lower (but in card spec)
    9.37 -				delays
    9.38 -		v1.10 4/21/97 Fixed module code so that multiple cards may be detected,
    9.39 -				other cleanups.  -djb
    9.40 -		Andrea Arcangeli:	Upgraded to Donald Becker's version 1.12.
    9.41 -		Rick Payne:	Fixed SMP race condition
    9.42 -		v1.13 9/8/97 Made 'max_interrupt_work' an insmod-settable variable -djb
    9.43 -		v1.14 10/15/97 Avoided waiting..discard message for fast machines -djb
    9.44 -		v1.15 1/31/98 Faster recovery for Tx errors. -djb
    9.45 -		v1.16 2/3/98 Different ID port handling to avoid sound cards. -djb
    9.46 -		v1.18 12Mar2001 Andrew Morton <andrewm@uow.edu.au>
    9.47 -			- Avoid bogus detect of 3c590's (Andrzej Krzysztofowicz)
    9.48 -			- Reviewed against 1.18 from scyld.com
    9.49 -*/
    9.50 -
    9.51 -/* A few values that may be tweaked. */
    9.52 -
    9.53 -/* Time in jiffies before concluding the transmitter is hung. */
    9.54 -#define TX_TIMEOUT  (400*HZ/1000)
    9.55 -/* Maximum events (Rx packets, etc.) to handle at each interrupt. */
    9.56 -static int max_interrupt_work = 10;
    9.57 -
    9.58 -#include <linux/config.h>
    9.59 -#include <linux/module.h>
    9.60 -
    9.61 -//#include <linux/mca.h>
    9.62 -//#include <linux/isapnp.h>
    9.63 -#include <linux/sched.h>
    9.64 -//#include <linux/string.h>
    9.65 -#include <linux/lib.h>
    9.66 -#include <linux/interrupt.h>
    9.67 -#include <linux/errno.h>
    9.68 -//#include <linux/in.h>
    9.69 -#include <linux/slab.h>
    9.70 -#include <linux/ioport.h>
    9.71 -#include <linux/init.h>
    9.72 -#include <linux/netdevice.h>
    9.73 -#include <linux/etherdevice.h>
    9.74 -#include <linux/skbuff.h>
    9.75 -#include <linux/delay.h>	/* for udelay() */
    9.76 -#include <linux/spinlock.h>
    9.77 -
    9.78 -#include <asm/bitops.h>
    9.79 -#include <asm/io.h>
    9.80 -#include <asm/irq.h>
    9.81 -
    9.82 -static char versionA[] __initdata = "3c509.c:1.18 12Mar2001 becker@scyld.com\n";
    9.83 -static char versionB[] __initdata = "http://www.scyld.com/network/3c509.html\n";
    9.84 -
    9.85 -#ifdef EL3_DEBUG
    9.86 -static int el3_debug = EL3_DEBUG;
    9.87 -#else
    9.88 -static int el3_debug = 2;
    9.89 -#endif
    9.90 -
    9.91 -/* To minimize the size of the driver source I only define operating
    9.92 -   constants if they are used several times.  You'll need the manual
    9.93 -   anyway if you want to understand driver details. */
    9.94 -/* Offsets from base I/O address. */
    9.95 -#define EL3_DATA 0x00
    9.96 -#define EL3_CMD 0x0e
    9.97 -#define EL3_STATUS 0x0e
    9.98 -#define	 EEPROM_READ 0x80
    9.99 -
   9.100 -#define EL3_IO_EXTENT	16
   9.101 -
   9.102 -#define EL3WINDOW(win_num) outw(SelectWindow + (win_num), ioaddr + EL3_CMD)
   9.103 -
   9.104 -
   9.105 -/* The top five bits written to EL3_CMD are a command, the lower
   9.106 -   11 bits are the parameter, if applicable. */
   9.107 -enum c509cmd {
   9.108 -	TotalReset = 0<<11, SelectWindow = 1<<11, StartCoax = 2<<11,
   9.109 -	RxDisable = 3<<11, RxEnable = 4<<11, RxReset = 5<<11, RxDiscard = 8<<11,
   9.110 -	TxEnable = 9<<11, TxDisable = 10<<11, TxReset = 11<<11,
   9.111 -	FakeIntr = 12<<11, AckIntr = 13<<11, SetIntrEnb = 14<<11,
   9.112 -	SetStatusEnb = 15<<11, SetRxFilter = 16<<11, SetRxThreshold = 17<<11,
   9.113 -	SetTxThreshold = 18<<11, SetTxStart = 19<<11, StatsEnable = 21<<11,
   9.114 -	StatsDisable = 22<<11, StopCoax = 23<<11,};
   9.115 -
   9.116 -enum c509status {
   9.117 -	IntLatch = 0x0001, AdapterFailure = 0x0002, TxComplete = 0x0004,
   9.118 -	TxAvailable = 0x0008, RxComplete = 0x0010, RxEarly = 0x0020,
   9.119 -	IntReq = 0x0040, StatsFull = 0x0080, CmdBusy = 0x1000, };
   9.120 -
   9.121 -/* The SetRxFilter command accepts the following classes: */
   9.122 -enum RxFilter {
   9.123 -	RxStation = 1, RxMulticast = 2, RxBroadcast = 4, RxProm = 8 };
   9.124 -
   9.125 -/* Register window 1 offsets, the window used in normal operation. */
   9.126 -#define TX_FIFO		0x00
   9.127 -#define RX_FIFO		0x00
   9.128 -#define RX_STATUS 	0x08
   9.129 -#define TX_STATUS 	0x0B
   9.130 -#define TX_FREE		0x0C		/* Remaining free bytes in Tx buffer. */
   9.131 -
   9.132 -#define WN0_IRQ		0x08		/* Window 0: Set IRQ line in bits 12-15. */
   9.133 -#define WN4_MEDIA	0x0A		/* Window 4: Various transcvr/media bits. */
   9.134 -#define  MEDIA_TP	0x00C0		/* Enable link beat and jabber for 10baseT. */
   9.135 -
   9.136 -/*
   9.137 - * Must be a power of two (we use a binary and in the
   9.138 - * circular queue)
   9.139 - */
   9.140 -#define SKB_QUEUE_SIZE	64
   9.141 -
   9.142 -struct el3_private {
   9.143 -	struct net_device_stats stats;
   9.144 -	struct net_device *next_dev;
   9.145 -	spinlock_t lock;
   9.146 -	/* skb send-queue */
   9.147 -	int head, size;
   9.148 -	struct sk_buff *queue[SKB_QUEUE_SIZE];
   9.149 -	char mca_slot;
   9.150 -};
   9.151 -static int id_port __initdata = 0x110;	/* Start with 0x110 to avoid new sound cards.*/
   9.152 -static struct net_device *el3_root_dev;
   9.153 -
   9.154 -static ushort id_read_eeprom(int index);
   9.155 -static ushort read_eeprom(int ioaddr, int index);
   9.156 -static int el3_open(struct net_device *dev);
   9.157 -static int el3_start_xmit(struct sk_buff *skb, struct net_device *dev);
   9.158 -static void el3_interrupt(int irq, void *dev_id, struct pt_regs *regs);
   9.159 -static void update_stats(struct net_device *dev);
   9.160 -static struct net_device_stats *el3_get_stats(struct net_device *dev);
   9.161 -static int el3_rx(struct net_device *dev);
   9.162 -static int el3_close(struct net_device *dev);
   9.163 -static void set_multicast_list(struct net_device *dev);
   9.164 -static void el3_tx_timeout (struct net_device *dev);
   9.165 -
   9.166 -#ifdef CONFIG_MCA
   9.167 -struct el3_mca_adapters_struct {
   9.168 -	char* name;
   9.169 -	int id;
   9.170 -};
   9.171 -
   9.172 -static struct el3_mca_adapters_struct el3_mca_adapters[] __initdata = {
   9.173 -	{ "3Com 3c529 EtherLink III (10base2)", 0x627c },
   9.174 -	{ "3Com 3c529 EtherLink III (10baseT)", 0x627d },
   9.175 -	{ "3Com 3c529 EtherLink III (test mode)", 0x62db },
   9.176 -	{ "3Com 3c529 EtherLink III (TP or coax)", 0x62f6 },
   9.177 -	{ "3Com 3c529 EtherLink III (TP)", 0x62f7 },
   9.178 -	{ NULL, 0 },
   9.179 -};
   9.180 -#endif /* CONFIG_MCA */
   9.181 -
   9.182 -#if defined(CONFIG_ISAPNP) || defined(CONFIG_ISAPNP_MODULE)
   9.183 -static struct isapnp_device_id el3_isapnp_adapters[] __initdata = {
   9.184 -	{	ISAPNP_ANY_ID, ISAPNP_ANY_ID,
   9.185 -		ISAPNP_VENDOR('T', 'C', 'M'), ISAPNP_FUNCTION(0x5090),
   9.186 -		(long) "3Com Etherlink III (TP)" },
   9.187 -	{	ISAPNP_ANY_ID, ISAPNP_ANY_ID,
   9.188 -		ISAPNP_VENDOR('T', 'C', 'M'), ISAPNP_FUNCTION(0x5091),
   9.189 -		(long) "3Com Etherlink III" },
   9.190 -	{	ISAPNP_ANY_ID, ISAPNP_ANY_ID,
   9.191 -		ISAPNP_VENDOR('T', 'C', 'M'), ISAPNP_FUNCTION(0x5094),
   9.192 -		(long) "3Com Etherlink III (combo)" },
   9.193 -	{	ISAPNP_ANY_ID, ISAPNP_ANY_ID,
   9.194 -		ISAPNP_VENDOR('T', 'C', 'M'), ISAPNP_FUNCTION(0x5095),
   9.195 -		(long) "3Com Etherlink III (TPO)" },
   9.196 -	{	ISAPNP_ANY_ID, ISAPNP_ANY_ID,
   9.197 -		ISAPNP_VENDOR('T', 'C', 'M'), ISAPNP_FUNCTION(0x5098),
   9.198 -		(long) "3Com Etherlink III (TPC)" },
   9.199 -	{	ISAPNP_ANY_ID, ISAPNP_ANY_ID,
   9.200 -		ISAPNP_VENDOR('P', 'N', 'P'), ISAPNP_FUNCTION(0x80f7),
   9.201 -		(long) "3Com Etherlink III compatible" },
   9.202 -	{	ISAPNP_ANY_ID, ISAPNP_ANY_ID,
   9.203 -		ISAPNP_VENDOR('P', 'N', 'P'), ISAPNP_FUNCTION(0x80f8),
   9.204 -		(long) "3Com Etherlink III compatible" },
   9.205 -	{ }	/* terminate list */
   9.206 -};
   9.207 -
   9.208 -MODULE_DEVICE_TABLE(isapnp, el3_isapnp_adapters);
   9.209 -MODULE_LICENSE("GPL");
   9.210 -
   9.211 -
   9.212 -static u16 el3_isapnp_phys_addr[8][3];
   9.213 -static int nopnp;
   9.214 -#endif /* CONFIG_ISAPNP || CONFIG_ISAPNP_MODULE */
   9.215 -
   9.216 -int __init el3_probe(struct net_device *dev)
   9.217 -{
   9.218 -	struct el3_private *lp;
   9.219 -	short lrs_state = 0xff, i;
   9.220 -	int ioaddr, irq, if_port;
   9.221 -	u16 phys_addr[3];
   9.222 -	static int current_tag;
   9.223 -	int mca_slot = -1;
   9.224 -#if defined(CONFIG_ISAPNP) || defined(CONFIG_ISAPNP_MODULE)
   9.225 -	static int pnp_cards;
   9.226 -#endif /* CONFIG_ISAPNP || CONFIG_ISAPNP_MODULE */
   9.227 -
   9.228 -	if (dev) SET_MODULE_OWNER(dev);
   9.229 -
   9.230 -	/* First check all slots of the EISA bus.  The next slot address to
   9.231 -	   probe is kept in 'eisa_addr' to support multiple probe() calls. */
   9.232 -	if (EISA_bus) {
   9.233 -		static int eisa_addr = 0x1000;
   9.234 -		while (eisa_addr < 0x9000) {
   9.235 -			int device_id;
   9.236 -
   9.237 -			ioaddr = eisa_addr;
   9.238 -			eisa_addr += 0x1000;
   9.239 -
   9.240 -			/* Check the standard EISA ID register for an encoded '3Com'. */
   9.241 -			if (inw(ioaddr + 0xC80) != 0x6d50)
   9.242 -				continue;
   9.243 -
   9.244 -			/* Avoid conflict with 3c590, 3c592, 3c597, etc */
   9.245 -			device_id = (inb(ioaddr + 0xC82)<<8) + inb(ioaddr + 0xC83);
   9.246 -			if ((device_id & 0xFF00) == 0x5900) {
   9.247 -				continue;
   9.248 -			}
   9.249 -
   9.250 -			/* Change the register set to the configuration window 0. */
   9.251 -			outw(SelectWindow | 0, ioaddr + 0xC80 + EL3_CMD);
   9.252 -
   9.253 -			irq = inw(ioaddr + WN0_IRQ) >> 12;
   9.254 -			if_port = inw(ioaddr + 6)>>14;
   9.255 -			for (i = 0; i < 3; i++)
   9.256 -				phys_addr[i] = htons(read_eeprom(ioaddr, i));
   9.257 -
   9.258 -			/* Restore the "Product ID" to the EEPROM read register. */
   9.259 -			read_eeprom(ioaddr, 3);
   9.260 -
   9.261 -			/* Was the EISA code an add-on hack?  Nahhhhh... */
   9.262 -			goto found;
   9.263 -		}
   9.264 -	}
   9.265 -
   9.266 -#ifdef CONFIG_MCA
   9.267 -	/* Based on Erik Nygren's (nygren@mit.edu) 3c529 patch, heavily
   9.268 -	 * modified by Chris Beauregard (cpbeaure@csclub.uwaterloo.ca)
   9.269 -	 * to support standard MCA probing.
   9.270 -	 *
   9.271 -	 * redone for multi-card detection by ZP Gu (zpg@castle.net)
   9.272 -	 * now works as a module
   9.273 -	 */
   9.274 -
   9.275 -	if( MCA_bus ) {
   9.276 -		int slot, j;
   9.277 -		u_char pos4, pos5;
   9.278 -
   9.279 -		for( j = 0; el3_mca_adapters[j].name != NULL; j ++ ) {
   9.280 -			slot = 0;
   9.281 -			while( slot != MCA_NOTFOUND ) {
   9.282 -				slot = mca_find_unused_adapter(
   9.283 -					el3_mca_adapters[j].id, slot );
   9.284 -				if( slot == MCA_NOTFOUND ) break;
   9.285 -
   9.286 -				/* if we get this far, an adapter has been
   9.287 -				 * detected and is enabled
   9.288 -				 */
   9.289 -
   9.290 -				pos4 = mca_read_stored_pos( slot, 4 );
   9.291 -				pos5 = mca_read_stored_pos( slot, 5 );
   9.292 -
   9.293 -				ioaddr = ((short)((pos4&0xfc)|0x02)) << 8;
   9.294 -				irq = pos5 & 0x0f;
   9.295 -
   9.296 -				/* probing for a card at a particular IO/IRQ */
   9.297 -				if(dev && ((dev->irq >= 1 && dev->irq != irq) ||
   9.298 -			   	(dev->base_addr >= 1 && dev->base_addr != ioaddr))) {
   9.299 -					slot++;         /* probing next slot */
   9.300 -					continue;
   9.301 -				}
   9.302 -
   9.303 -				printk("3c509: found %s at slot %d\n",
   9.304 -					el3_mca_adapters[j].name, slot + 1 );
   9.305 -
   9.306 -				/* claim the slot */
   9.307 -				mca_set_adapter_name(slot, el3_mca_adapters[j].name);
   9.308 -				mca_set_adapter_procfn(slot, NULL, NULL);
   9.309 -				mca_mark_as_used(slot);
   9.310 -
   9.311 -				if_port = pos4 & 0x03;
   9.312 -				if (el3_debug > 2) {
   9.313 -					printk("3c529: irq %d  ioaddr 0x%x  ifport %d\n", irq, ioaddr, if_port);
   9.314 -				}
   9.315 -				EL3WINDOW(0);
   9.316 -				for (i = 0; i < 3; i++) {
   9.317 -					phys_addr[i] = htons(read_eeprom(ioaddr, i));
   9.318 -				}
   9.319 -				
   9.320 -				mca_slot = slot;
   9.321 -
   9.322 -				goto found;
   9.323 -			}
   9.324 -		}
   9.325 -		/* if we get here, we didn't find an MCA adapter */
   9.326 -		return -ENODEV;
   9.327 -	}
   9.328 -#endif /* CONFIG_MCA */
   9.329 -
   9.330 -#if defined(CONFIG_ISAPNP) || defined(CONFIG_ISAPNP_MODULE)
   9.331 -	if (nopnp == 1)
   9.332 -		goto no_pnp;
   9.333 -
   9.334 -	for (i=0; el3_isapnp_adapters[i].vendor != 0; i++) {
   9.335 -		struct pci_dev *idev = NULL;
   9.336 -		int j;
   9.337 -		while ((idev = isapnp_find_dev(NULL,
   9.338 -						el3_isapnp_adapters[i].vendor,
   9.339 -						el3_isapnp_adapters[i].function,
   9.340 -						idev))) {
   9.341 -			idev->prepare(idev);
   9.342 -			/* Deactivation is needed if the driver was called
   9.343 -			   with "nopnp=1" before, does not harm if not. */
   9.344 -			idev->deactivate(idev);
   9.345 -			idev->activate(idev);
   9.346 -			if (!idev->resource[0].start || check_region(idev->resource[0].start, EL3_IO_EXTENT))
   9.347 -				continue;
   9.348 -			ioaddr = idev->resource[0].start;
   9.349 -			if (!request_region(ioaddr, EL3_IO_EXTENT, "3c509 PnP"))
   9.350 -				return -EBUSY;
   9.351 -			irq = idev->irq_resource[0].start;
   9.352 -			if (el3_debug > 3)
   9.353 -				printk ("ISAPnP reports %s at i/o 0x%x, irq %d\n",
   9.354 -					(char*) el3_isapnp_adapters[i].driver_data, ioaddr, irq);
   9.355 -			EL3WINDOW(0);
   9.356 -			for (j = 0; j < 3; j++)
   9.357 -				el3_isapnp_phys_addr[pnp_cards][j] =
   9.358 -					phys_addr[j] =
   9.359 -						htons(read_eeprom(ioaddr, j));
   9.360 -			if_port = read_eeprom(ioaddr, 8) >> 14;
   9.361 -			pnp_cards++;
   9.362 -			goto found;
   9.363 -		}
   9.364 -	}
   9.365 -no_pnp:
   9.366 -#endif /* CONFIG_ISAPNP || CONFIG_ISAPNP_MODULE */
   9.367 -
   9.368 -	/* Select an open I/O location at 0x1*0 to do contention select. */
   9.369 -	for ( ; id_port < 0x200; id_port += 0x10) {
   9.370 -		if (check_region(id_port, 1))
   9.371 -			continue;
   9.372 -		outb(0x00, id_port);
   9.373 -		outb(0xff, id_port);
   9.374 -		if (inb(id_port) & 0x01)
   9.375 -			break;
   9.376 -	}
   9.377 -	if (id_port >= 0x200) {
   9.378 -		/* Rare -- do we really need a warning? */
   9.379 -		printk(" WARNING: No I/O port available for 3c509 activation.\n");
   9.380 -		return -ENODEV;
   9.381 -	}
   9.382 -	/* Next check for all ISA bus boards by sending the ID sequence to the
   9.383 -	   ID_PORT.  We find cards past the first by setting the 'current_tag'
   9.384 -	   on cards as they are found.  Cards with their tag set will not
   9.385 -	   respond to subsequent ID sequences. */
   9.386 -
   9.387 -	outb(0x00, id_port);
   9.388 -	outb(0x00, id_port);
   9.389 -	for(i = 0; i < 255; i++) {
   9.390 -		outb(lrs_state, id_port);
   9.391 -		lrs_state <<= 1;
   9.392 -		lrs_state = lrs_state & 0x100 ? lrs_state ^ 0xcf : lrs_state;
   9.393 -	}
   9.394 -
   9.395 -	/* For the first probe, clear all board's tag registers. */
   9.396 -	if (current_tag == 0)
   9.397 -		outb(0xd0, id_port);
   9.398 -	else				/* Otherwise kill off already-found boards. */
   9.399 -		outb(0xd8, id_port);
   9.400 -
   9.401 -	if (id_read_eeprom(7) != 0x6d50) {
   9.402 -		return -ENODEV;
   9.403 -	}
   9.404 -
   9.405 -	/* Read in EEPROM data, which does contention-select.
   9.406 -	   Only the lowest address board will stay "on-line".
   9.407 -	   3Com got the byte order backwards. */
   9.408 -	for (i = 0; i < 3; i++) {
   9.409 -		phys_addr[i] = htons(id_read_eeprom(i));
   9.410 -	}
   9.411 -
   9.412 -#if defined(CONFIG_ISAPNP) || defined(CONFIG_ISAPNP_MODULE)
   9.413 -	if (nopnp == 0) {
   9.414 -		/* The ISA PnP 3c509 cards respond to the ID sequence.
   9.415 -		   This check is needed in order not to register them twice. */
   9.416 -		for (i = 0; i < pnp_cards; i++) {
   9.417 -			if (phys_addr[0] == el3_isapnp_phys_addr[i][0] &&
   9.418 -			    phys_addr[1] == el3_isapnp_phys_addr[i][1] &&
   9.419 -			    phys_addr[2] == el3_isapnp_phys_addr[i][2])
   9.420 -			{
   9.421 -				if (el3_debug > 3)
   9.422 -					printk("3c509 with address %02x %02x %02x %02x %02x %02x was found by ISAPnP\n",
   9.423 -						phys_addr[0] & 0xff, phys_addr[0] >> 8,
   9.424 -						phys_addr[1] & 0xff, phys_addr[1] >> 8,
   9.425 -						phys_addr[2] & 0xff, phys_addr[2] >> 8);
   9.426 -				/* Set the adaptor tag so that the next card can be found. */
   9.427 -				outb(0xd0 + ++current_tag, id_port);
   9.428 -				goto no_pnp;
   9.429 -			}
   9.430 -		}
   9.431 -	}
   9.432 -#endif /* CONFIG_ISAPNP || CONFIG_ISAPNP_MODULE */
   9.433 -
   9.434 -	{
   9.435 -		unsigned int iobase = id_read_eeprom(8);
   9.436 -		if_port = iobase >> 14;
   9.437 -		ioaddr = 0x200 + ((iobase & 0x1f) << 4);
   9.438 -	}
   9.439 -	irq = id_read_eeprom(9) >> 12;
   9.440 -
   9.441 -	if (dev) {					/* Set passed-in IRQ or I/O Addr. */
   9.442 -		if (dev->irq > 1  &&  dev->irq < 16)
   9.443 -			irq = dev->irq;
   9.444 -
   9.445 -		if (dev->base_addr) {
   9.446 -			if (dev->mem_end == 0x3c509 			/* Magic key */
   9.447 -				&& dev->base_addr >= 0x200  &&  dev->base_addr <= 0x3e0)
   9.448 -				ioaddr = dev->base_addr & 0x3f0;
   9.449 -			else if (dev->base_addr != ioaddr)
   9.450 -				return -ENODEV;
   9.451 -		}
   9.452 -	}
   9.453 -
   9.454 -	if (!request_region(ioaddr, EL3_IO_EXTENT, "3c509"))
   9.455 -		return -EBUSY;
   9.456 -
   9.457 -	/* Set the adaptor tag so that the next card can be found. */
   9.458 -	outb(0xd0 + ++current_tag, id_port);
   9.459 -
   9.460 -	/* Activate the adaptor at the EEPROM location. */
   9.461 -	outb((ioaddr >> 4) | 0xe0, id_port);
   9.462 -
   9.463 -	EL3WINDOW(0);
   9.464 -	if (inw(ioaddr) != 0x6d50) {
   9.465 -		release_region(ioaddr, EL3_IO_EXTENT);
   9.466 -		return -ENODEV;
   9.467 -	}
   9.468 -
   9.469 -	/* Free the interrupt so that some other card can use it. */
   9.470 -	outw(0x0f00, ioaddr + WN0_IRQ);
   9.471 - found:
   9.472 -	if (dev == NULL) {
   9.473 -		dev = init_etherdev(dev, sizeof(struct el3_private));
   9.474 -		if (dev == NULL) {
   9.475 -			release_region(ioaddr, EL3_IO_EXTENT);
   9.476 -			return -ENOMEM;
   9.477 -		}
   9.478 -		SET_MODULE_OWNER(dev);
   9.479 -	}
   9.480 -	memcpy(dev->dev_addr, phys_addr, sizeof(phys_addr));
   9.481 -	dev->base_addr = ioaddr;
   9.482 -	dev->irq = irq;
   9.483 -	dev->if_port = (dev->mem_start & 0x1f) ? dev->mem_start & 3 : if_port;
   9.484 -
   9.485 -	{
   9.486 -		const char *if_names[] = {"10baseT", "AUI", "undefined", "BNC"};
   9.487 -		printk("%s: 3c5x9 at %#3.3lx, %s port, address ",
   9.488 -			   dev->name, dev->base_addr, if_names[dev->if_port]);
   9.489 -	}
   9.490 -
   9.491 -	/* Read in the station address. */
   9.492 -	for (i = 0; i < 6; i++)
   9.493 -		printk(" %2.2x", dev->dev_addr[i]);
   9.494 -	printk(", IRQ %d.\n", dev->irq);
   9.495 -
   9.496 -	/* Make up a EL3-specific-data structure. */
   9.497 -	if (dev->priv == NULL)
   9.498 -		dev->priv = kmalloc(sizeof(struct el3_private), GFP_KERNEL);
   9.499 -	if (dev->priv == NULL)
   9.500 -		return -ENOMEM;
   9.501 -	memset(dev->priv, 0, sizeof(struct el3_private));
   9.502 -	
   9.503 -	lp = dev->priv;
   9.504 -	lp->mca_slot = mca_slot;
   9.505 -	lp->next_dev = el3_root_dev;
   9.506 -	spin_lock_init(&lp->lock);
   9.507 -	el3_root_dev = dev;
   9.508 -
   9.509 -	if (el3_debug > 0)
   9.510 -		printk(KERN_INFO "%s" KERN_INFO "%s", versionA, versionB);
   9.511 -
   9.512 -	/* The EL3-specific entries in the device structure. */
   9.513 -	dev->open = &el3_open;
   9.514 -	dev->hard_start_xmit = &el3_start_xmit;
   9.515 -	dev->stop = &el3_close;
   9.516 -	dev->get_stats = &el3_get_stats;
   9.517 -	dev->set_multicast_list = &set_multicast_list;
   9.518 -	dev->tx_timeout = el3_tx_timeout;
   9.519 -	dev->watchdog_timeo = TX_TIMEOUT;
   9.520 -
   9.521 -	/* Fill in the generic fields of the device structure. */
   9.522 -	ether_setup(dev);
   9.523 -	return 0;
   9.524 -}
   9.525 -
   9.526 -/* Read a word from the EEPROM using the regular EEPROM access register.
   9.527 -   Assume that we are in register window zero.
   9.528 - */
   9.529 -static ushort __init read_eeprom(int ioaddr, int index)
   9.530 -{
   9.531 -	outw(EEPROM_READ + index, ioaddr + 10);
   9.532 -	/* Pause for at least 162 us. for the read to take place. */
   9.533 -	udelay (500);
   9.534 -	return inw(ioaddr + 12);
   9.535 -}
   9.536 -
   9.537 -/* Read a word from the EEPROM when in the ISA ID probe state. */
   9.538 -static ushort __init id_read_eeprom(int index)
   9.539 -{
   9.540 -	int bit, word = 0;
   9.541 -
   9.542 -	/* Issue read command, and pause for at least 162 us. for it to complete.
   9.543 -	   Assume extra-fast 16Mhz bus. */
   9.544 -	outb(EEPROM_READ + index, id_port);
   9.545 -
   9.546 -	/* Pause for at least 162 us. for the read to take place. */
   9.547 -	udelay (500);
   9.548 -	
   9.549 -	for (bit = 15; bit >= 0; bit--)
   9.550 -		word = (word << 1) + (inb(id_port) & 0x01);
   9.551 -
   9.552 -	if (el3_debug > 3)
   9.553 -		printk("  3c509 EEPROM word %d %#4.4x.\n", index, word);
   9.554 -
   9.555 -	return word;
   9.556 -}
   9.557 -
   9.558 -
   9.559 -static int
   9.560 -el3_open(struct net_device *dev)
   9.561 -{
   9.562 -	int ioaddr = dev->base_addr;
   9.563 -	int i;
   9.564 -
   9.565 -	outw(TxReset, ioaddr + EL3_CMD);
   9.566 -	outw(RxReset, ioaddr + EL3_CMD);
   9.567 -	outw(SetStatusEnb | 0x00, ioaddr + EL3_CMD);
   9.568 -
   9.569 -	i = request_irq(dev->irq, &el3_interrupt, 0, dev->name, dev);
   9.570 -	if (i) return i;
   9.571 -
   9.572 -	EL3WINDOW(0);
   9.573 -	if (el3_debug > 3)
   9.574 -		printk("%s: Opening, IRQ %d	 status@%x %4.4x.\n", dev->name,
   9.575 -			   dev->irq, ioaddr + EL3_STATUS, inw(ioaddr + EL3_STATUS));
   9.576 -
   9.577 -	/* Activate board: this is probably unnecessary. */
   9.578 -	outw(0x0001, ioaddr + 4);
   9.579 -
   9.580 -	/* Set the IRQ line. */
   9.581 -	outw((dev->irq << 12) | 0x0f00, ioaddr + WN0_IRQ);
   9.582 -
   9.583 -	/* Set the station address in window 2 each time opened. */
   9.584 -	EL3WINDOW(2);
   9.585 -
   9.586 -	for (i = 0; i < 6; i++)
   9.587 -		outb(dev->dev_addr[i], ioaddr + i);
   9.588 -
   9.589 -	if (dev->if_port == 3)
   9.590 -		/* Start the thinnet transceiver. We should really wait 50ms...*/
   9.591 -		outw(StartCoax, ioaddr + EL3_CMD);
   9.592 -	else if (dev->if_port == 0) {
   9.593 -		/* 10baseT interface, enabled link beat and jabber check. */
   9.594 -		EL3WINDOW(4);
   9.595 -		outw(inw(ioaddr + WN4_MEDIA) | MEDIA_TP, ioaddr + WN4_MEDIA);
   9.596 -	}
   9.597 -
   9.598 -	/* Switch to the stats window, and clear all stats by reading. */
   9.599 -	outw(StatsDisable, ioaddr + EL3_CMD);
   9.600 -	EL3WINDOW(6);
   9.601 -	for (i = 0; i < 9; i++)
   9.602 -		inb(ioaddr + i);
   9.603 -	inw(ioaddr + 10);
   9.604 -	inw(ioaddr + 12);
   9.605 -
   9.606 -	/* Switch to register set 1 for normal use. */
   9.607 -	EL3WINDOW(1);
   9.608 -
   9.609 -	/* Accept b-case and phys addr only. */
   9.610 -	outw(SetRxFilter | RxStation | RxBroadcast, ioaddr + EL3_CMD);
   9.611 -	outw(StatsEnable, ioaddr + EL3_CMD); /* Turn on statistics. */
   9.612 -
   9.613 -	netif_start_queue(dev);
   9.614 -
   9.615 -	outw(RxEnable, ioaddr + EL3_CMD); /* Enable the receiver. */
   9.616 -	outw(TxEnable, ioaddr + EL3_CMD); /* Enable transmitter. */
   9.617 -	/* Allow status bits to be seen. */
   9.618 -	outw(SetStatusEnb | 0xff, ioaddr + EL3_CMD);
   9.619 -	/* Ack all pending events, and set active indicator mask. */
   9.620 -	outw(AckIntr | IntLatch | TxAvailable | RxEarly | IntReq,
   9.621 -		 ioaddr + EL3_CMD);
   9.622 -	outw(SetIntrEnb | IntLatch|TxAvailable|TxComplete|RxComplete|StatsFull,
   9.623 -		 ioaddr + EL3_CMD);
   9.624 -
   9.625 -	if (el3_debug > 3)
   9.626 -		printk("%s: Opened 3c509  IRQ %d  status %4.4x.\n",
   9.627 -			   dev->name, dev->irq, inw(ioaddr + EL3_STATUS));
   9.628 -
   9.629 -	return 0;
   9.630 -}
   9.631 -
   9.632 -static void
   9.633 -el3_tx_timeout (struct net_device *dev)
   9.634 -{
   9.635 -	struct el3_private *lp = (struct el3_private *)dev->priv;
   9.636 -	int ioaddr = dev->base_addr;
   9.637 -
   9.638 -	/* Transmitter timeout, serious problems. */
   9.639 -	printk("%s: transmit timed out, Tx_status %2.2x status %4.4x "
   9.640 -		   "Tx FIFO room %d.\n",
   9.641 -		   dev->name, inb(ioaddr + TX_STATUS), inw(ioaddr + EL3_STATUS),
   9.642 -		   inw(ioaddr + TX_FREE));
   9.643 -	lp->stats.tx_errors++;
   9.644 -	dev->trans_start = jiffies;
   9.645 -	/* Issue TX_RESET and TX_START commands. */
   9.646 -	outw(TxReset, ioaddr + EL3_CMD);
   9.647 -	outw(TxEnable, ioaddr + EL3_CMD);
   9.648 -	netif_wake_queue(dev);
   9.649 -}
   9.650 -
   9.651 -
   9.652 -static int
   9.653 -el3_start_xmit(struct sk_buff *skb, struct net_device *dev)
   9.654 -{
   9.655 -	struct el3_private *lp = (struct el3_private *)dev->priv;
   9.656 -	int ioaddr = dev->base_addr;
   9.657 -	unsigned long flags;
   9.658 -
   9.659 -	netif_stop_queue (dev);
   9.660 -
   9.661 -	lp->stats.tx_bytes += skb->len;
   9.662 -	
   9.663 -	if (el3_debug > 4) {
   9.664 -		printk("%s: el3_start_xmit(length = %u) called, status %4.4x.\n",
   9.665 -			   dev->name, skb->len, inw(ioaddr + EL3_STATUS));
   9.666 -	}
   9.667 -#if 0
   9.668 -#ifndef final_version
   9.669 -	{	/* Error-checking code, delete someday. */
   9.670 -		ushort status = inw(ioaddr + EL3_STATUS);
   9.671 -		if (status & 0x0001 		/* IRQ line active, missed one. */
   9.672 -			&& inw(ioaddr + EL3_STATUS) & 1) { 			/* Make sure. */
   9.673 -			printk("%s: Missed interrupt, status then %04x now %04x"
   9.674 -				   "  Tx %2.2x Rx %4.4x.\n", dev->name, status,
   9.675 -				   inw(ioaddr + EL3_STATUS), inb(ioaddr + TX_STATUS),
   9.676 -				   inw(ioaddr + RX_STATUS));
   9.677 -			/* Fake interrupt trigger by masking, acknowledge interrupts. */
   9.678 -			outw(SetStatusEnb | 0x00, ioaddr + EL3_CMD);
   9.679 -			outw(AckIntr | IntLatch | TxAvailable | RxEarly | IntReq,
   9.680 -				 ioaddr + EL3_CMD);
   9.681 -			outw(SetStatusEnb | 0xff, ioaddr + EL3_CMD);
   9.682 -		}
   9.683 -	}
   9.684 -#endif
   9.685 -#endif
   9.686 -	/*
   9.687 -	 *	We lock the driver against other processors. Note
   9.688 -	 *	we don't need to lock versus the IRQ as we suspended
   9.689 -	 *	that. This means that we lose the ability to take
   9.690 -	 *	an RX during a TX upload. That sucks a bit with SMP
   9.691 -	 *	on an original 3c509 (2K buffer)
   9.692 -	 *
   9.693 -	 *	Using disable_irq stops us crapping on other
   9.694 -	 *	time sensitive devices.
   9.695 -	 */
   9.696 -
   9.697 -    	spin_lock_irqsave(&lp->lock, flags);
   9.698 -	    
   9.699 -	/* Put out the doubleword header... */
   9.700 -	outw(skb->len, ioaddr + TX_FIFO);
   9.701 -	outw(0x00, ioaddr + TX_FIFO);
   9.702 -	/* ... and the packet rounded to a doubleword. */
   9.703 -#ifdef  __powerpc__
   9.704 -	outsl_unswapped(ioaddr + TX_FIFO, skb->data, (skb->len + 3) >> 2);
   9.705 -#else
   9.706 -	outsl(ioaddr + TX_FIFO, skb->data, (skb->len + 3) >> 2);
   9.707 -#endif
   9.708 -
   9.709 -	dev->trans_start = jiffies;
   9.710 -	if (inw(ioaddr + TX_FREE) > 1536)
   9.711 -		netif_start_queue(dev);
   9.712 -	else
   9.713 -		/* Interrupt us when the FIFO has room for max-sized packet. */
   9.714 -		outw(SetTxThreshold + 1536, ioaddr + EL3_CMD);
   9.715 -
   9.716 -	spin_unlock_irqrestore(&lp->lock, flags);
   9.717 -
   9.718 -	dev_kfree_skb (skb);
   9.719 -
   9.720 -	/* Clear the Tx status stack. */
   9.721 -	{
   9.722 -		short tx_status;
   9.723 -		int i = 4;
   9.724 -
   9.725 -		while (--i > 0	&&	(tx_status = inb(ioaddr + TX_STATUS)) > 0) {
   9.726 -			if (tx_status & 0x38) lp->stats.tx_aborted_errors++;
   9.727 -			if (tx_status & 0x30) outw(TxReset, ioaddr + EL3_CMD);
   9.728 -			if (tx_status & 0x3C) outw(TxEnable, ioaddr + EL3_CMD);
   9.729 -			outb(0x00, ioaddr + TX_STATUS); /* Pop the status stack. */
   9.730 -		}
   9.731 -	}
   9.732 -	return 0;
   9.733 -}
   9.734 -
   9.735 -/* The EL3 interrupt handler. */
   9.736 -static void
   9.737 -el3_interrupt(int irq, void *dev_id, struct pt_regs *regs)
   9.738 -{
   9.739 -	struct net_device *dev = (struct net_device *)dev_id;
   9.740 -	struct el3_private *lp;
   9.741 -	int ioaddr, status;
   9.742 -	int i = max_interrupt_work;
   9.743 -
   9.744 -	if (dev == NULL) {
   9.745 -		printk ("el3_interrupt(): irq %d for unknown device.\n", irq);
   9.746 -		return;
   9.747 -	}
   9.748 -
   9.749 -	lp = (struct el3_private *)dev->priv;
   9.750 -	spin_lock(&lp->lock);
   9.751 -
   9.752 -	ioaddr = dev->base_addr;
   9.753 -
   9.754 -	if (el3_debug > 4) {
   9.755 -		status = inw(ioaddr + EL3_STATUS);
   9.756 -		printk("%s: interrupt, status %4.4x.\n", dev->name, status);
   9.757 -	}
   9.758 -
   9.759 -	while ((status = inw(ioaddr + EL3_STATUS)) &
   9.760 -		   (IntLatch | RxComplete | StatsFull)) {
   9.761 -
   9.762 -		if (status & RxComplete)
   9.763 -			el3_rx(dev);
   9.764 -
   9.765 -		if (status & TxAvailable) {
   9.766 -			if (el3_debug > 5)
   9.767 -				printk("	TX room bit was handled.\n");
   9.768 -			/* There's room in the FIFO for a full-sized packet. */
   9.769 -			outw(AckIntr | TxAvailable, ioaddr + EL3_CMD);
   9.770 -			netif_wake_queue (dev);
   9.771 -		}
   9.772 -		if (status & (AdapterFailure | RxEarly | StatsFull | TxComplete)) {
   9.773 -			/* Handle all uncommon interrupts. */
   9.774 -			if (status & StatsFull)				/* Empty statistics. */
   9.775 -				update_stats(dev);
   9.776 -			if (status & RxEarly) {				/* Rx early is unused. */
   9.777 -				el3_rx(dev);
   9.778 -				outw(AckIntr | RxEarly, ioaddr + EL3_CMD);
   9.779 -			}
   9.780 -			if (status & TxComplete) {			/* Really Tx error. */
   9.781 -				struct el3_private *lp = (struct el3_private *)dev->priv;
   9.782 -				short tx_status;
   9.783 -				int i = 4;
   9.784 -
   9.785 -				while (--i>0 && (tx_status = inb(ioaddr + TX_STATUS)) > 0) {
   9.786 -					if (tx_status & 0x38) lp->stats.tx_aborted_errors++;
   9.787 -					if (tx_status & 0x30) outw(TxReset, ioaddr + EL3_CMD);
   9.788 -					if (tx_status & 0x3C) outw(TxEnable, ioaddr + EL3_CMD);
   9.789 -					outb(0x00, ioaddr + TX_STATUS); /* Pop the status stack. */
   9.790 -				}
   9.791 -			}
   9.792 -			if (status & AdapterFailure) {
   9.793 -				/* Adapter failure requires Rx reset and reinit. */
   9.794 -				outw(RxReset, ioaddr + EL3_CMD);
   9.795 -				/* Set the Rx filter to the current state. */
   9.796 -				outw(SetRxFilter | RxStation | RxBroadcast
   9.797 -					 | (dev->flags & IFF_ALLMULTI ? RxMulticast : 0)
   9.798 -					 | (dev->flags & IFF_PROMISC ? RxProm : 0),
   9.799 -					 ioaddr + EL3_CMD);
   9.800 -				outw(RxEnable, ioaddr + EL3_CMD); /* Re-enable the receiver. */
   9.801 -				outw(AckIntr | AdapterFailure, ioaddr + EL3_CMD);
   9.802 -			}
   9.803 -		}
   9.804 -
   9.805 -		if (--i < 0) {
   9.806 -			printk("%s: Infinite loop in interrupt, status %4.4x.\n",
   9.807 -				   dev->name, status);
   9.808 -			/* Clear all interrupts. */
   9.809 -			outw(AckIntr | 0xFF, ioaddr + EL3_CMD);
   9.810 -			break;
   9.811 -		}
   9.812 -		/* Acknowledge the IRQ. */
   9.813 -		outw(AckIntr | IntReq | IntLatch, ioaddr + EL3_CMD); /* Ack IRQ */
   9.814 -	}
   9.815 -
   9.816 -	if (el3_debug > 4) {
   9.817 -		printk("%s: exiting interrupt, status %4.4x.\n", dev->name,
   9.818 -			   inw(ioaddr + EL3_STATUS));
   9.819 -	}
   9.820 -	spin_unlock(&lp->lock);
   9.821 -	return;
   9.822 -}
   9.823 -
   9.824 -
   9.825 -static struct net_device_stats *
   9.826 -el3_get_stats(struct net_device *dev)
   9.827 -{
   9.828 -	struct el3_private *lp = (struct el3_private *)dev->priv;
   9.829 -	unsigned long flags;
   9.830 -
   9.831 -	/*
   9.832 -	 *	This is fast enough not to bother with disable IRQ
   9.833 -	 *	stuff.
   9.834 -	 */
   9.835 -	 
   9.836 -	spin_lock_irqsave(&lp->lock, flags);
   9.837 -	update_stats(dev);
   9.838 -	spin_unlock_irqrestore(&lp->lock, flags);
   9.839 -	return &lp->stats;
   9.840 -}
   9.841 -
   9.842 -/*  Update statistics.  We change to register window 6, so this should be run
   9.843 -	single-threaded if the device is active. This is expected to be a rare
   9.844 -	operation, and it's simpler for the rest of the driver to assume that
   9.845 -	window 1 is always valid rather than use a special window-state variable.
   9.846 -	*/
   9.847 -static void update_stats(struct net_device *dev)
   9.848 -{
   9.849 -	struct el3_private *lp = (struct el3_private *)dev->priv;
   9.850 -	int ioaddr = dev->base_addr;
   9.851 -
   9.852 -	if (el3_debug > 5)
   9.853 -		printk("   Updating the statistics.\n");
   9.854 -	/* Turn off statistics updates while reading. */
   9.855 -	outw(StatsDisable, ioaddr + EL3_CMD);
   9.856 -	/* Switch to the stats window, and read everything. */
   9.857 -	EL3WINDOW(6);
   9.858 -	lp->stats.tx_carrier_errors 	+= inb(ioaddr + 0);
   9.859 -	lp->stats.tx_heartbeat_errors	+= inb(ioaddr + 1);
   9.860 -	/* Multiple collisions. */	   inb(ioaddr + 2);
   9.861 -	lp->stats.collisions		+= inb(ioaddr + 3);
   9.862 -	lp->stats.tx_window_errors	+= inb(ioaddr + 4);
   9.863 -	lp->stats.rx_fifo_errors	+= inb(ioaddr + 5);
   9.864 -	lp->stats.tx_packets		+= inb(ioaddr + 6);
   9.865 -	/* Rx packets	*/		   inb(ioaddr + 7);
   9.866 -	/* Tx deferrals */		   inb(ioaddr + 8);
   9.867 -	inw(ioaddr + 10);	/* Total Rx and Tx octets. */
   9.868 -	inw(ioaddr + 12);
   9.869 -
   9.870 -	/* Back to window 1, and turn statistics back on. */
   9.871 -	EL3WINDOW(1);
   9.872 -	outw(StatsEnable, ioaddr + EL3_CMD);
   9.873 -	return;
   9.874 -}
   9.875 -
   9.876 -static int
   9.877 -el3_rx(struct net_device *dev)
   9.878 -{
   9.879 -	struct el3_private *lp = (struct el3_private *)dev->priv;
   9.880 -	int ioaddr = dev->base_addr;
   9.881 -	short rx_status;
   9.882 -
   9.883 -	if (el3_debug > 5)
   9.884 -		printk("   In rx_packet(), status %4.4x, rx_status %4.4x.\n",
   9.885 -			   inw(ioaddr+EL3_STATUS), inw(ioaddr+RX_STATUS));
   9.886 -	while ((rx_status = inw(ioaddr + RX_STATUS)) > 0) {
   9.887 -		if (rx_status & 0x4000) { /* Error, update stats. */
   9.888 -			short error = rx_status & 0x3800;
   9.889 -
   9.890 -			outw(RxDiscard, ioaddr + EL3_CMD);
   9.891 -			lp->stats.rx_errors++;
   9.892 -			switch (error) {
   9.893 -			case 0x0000:		lp->stats.rx_over_errors++; break;
   9.894 -			case 0x0800:		lp->stats.rx_length_errors++; break;
   9.895 -			case 0x1000:		lp->stats.rx_frame_errors++; break;
   9.896 -			case 0x1800:		lp->stats.rx_length_errors++; break;
   9.897 -			case 0x2000:		lp->stats.rx_frame_errors++; break;
   9.898 -			case 0x2800:		lp->stats.rx_crc_errors++; break;
   9.899 -			}
   9.900 -		} else {
   9.901 -			short pkt_len = rx_status & 0x7ff;
   9.902 -			struct sk_buff *skb;
   9.903 -
   9.904 -			skb = dev_alloc_skb(pkt_len+5);
   9.905 -			lp->stats.rx_bytes += pkt_len;
   9.906 -			if (el3_debug > 4)
   9.907 -				printk("Receiving packet size %d status %4.4x.\n",
   9.908 -					   pkt_len, rx_status);
   9.909 -			if (skb != NULL) {
   9.910 -				skb->dev = dev;
   9.911 -				skb_reserve(skb, 2);     /* Align IP on 16 byte */
   9.912 -
   9.913 -				/* 'skb->data' points to the start of sk_buff data area. */
   9.914 -#ifdef  __powerpc__
   9.915 -				insl_unswapped(ioaddr+RX_FIFO, skb_put(skb,pkt_len),
   9.916 -							   (pkt_len + 3) >> 2);
   9.917 -#else
   9.918 -				insl(ioaddr + RX_FIFO, skb_put(skb,pkt_len),
   9.919 -					 (pkt_len + 3) >> 2);
   9.920 -#endif
   9.921 -
   9.922 -				outw(RxDiscard, ioaddr + EL3_CMD); /* Pop top Rx packet. */
   9.923 -				skb->protocol = eth_type_trans(skb,dev);
   9.924 -				netif_rx(skb);
   9.925 -				dev->last_rx = jiffies;
   9.926 -				lp->stats.rx_packets++;
   9.927 -				continue;
   9.928 -			}
   9.929 -			outw(RxDiscard, ioaddr + EL3_CMD);
   9.930 -			lp->stats.rx_dropped++;
   9.931 -			if (el3_debug)
   9.932 -				printk("%s: Couldn't allocate a sk_buff of size %d.\n",
   9.933 -					   dev->name, pkt_len);
   9.934 -		}
   9.935 -		inw(ioaddr + EL3_STATUS); 				/* Delay. */
   9.936 -		while (inw(ioaddr + EL3_STATUS) & 0x1000)
   9.937 -			printk(KERN_DEBUG "	Waiting for 3c509 to discard packet, status %x.\n",
   9.938 -				   inw(ioaddr + EL3_STATUS) );
   9.939 -	}
   9.940 -
   9.941 -	return 0;
   9.942 -}
   9.943 -
   9.944 -/*
   9.945 - *     Set or clear the multicast filter for this adaptor.
   9.946 - */
   9.947 -static void
   9.948 -set_multicast_list(struct net_device *dev)
   9.949 -{
   9.950 -	unsigned long flags;
   9.951 -	struct el3_private *lp = (struct el3_private *)dev->priv;
   9.952 -	int ioaddr = dev->base_addr;
   9.953 -
   9.954 -	if (el3_debug > 1) {
   9.955 -		static int old;
   9.956 -		if (old != dev->mc_count) {
   9.957 -			old = dev->mc_count;
   9.958 -			printk("%s: Setting Rx mode to %d addresses.\n", dev->name, dev->mc_count);
   9.959 -		}
   9.960 -	}
   9.961 -	spin_lock_irqsave(&lp->lock, flags);
   9.962 -	if (dev->flags&IFF_PROMISC) {
   9.963 -		outw(SetRxFilter | RxStation | RxMulticast | RxBroadcast | RxProm,
   9.964 -			 ioaddr + EL3_CMD);
   9.965 -	}
   9.966 -	else if (dev->mc_count || (dev->flags&IFF_ALLMULTI)) {
   9.967 -		outw(SetRxFilter | RxStation | RxMulticast | RxBroadcast, ioaddr + EL3_CMD);
   9.968 -	}
   9.969 -	else
   9.970 -                outw(SetRxFilter | RxStation | RxBroadcast, ioaddr + EL3_CMD);
   9.971 -	spin_unlock_irqrestore(&lp->lock, flags);
   9.972 -}
   9.973 -
   9.974 -static int
   9.975 -el3_close(struct net_device *dev)
   9.976 -{
   9.977 -	int ioaddr = dev->base_addr;
   9.978 -
   9.979 -	if (el3_debug > 2)
   9.980 -		printk("%s: Shutting down ethercard.\n", dev->name);
   9.981 -
   9.982 -	netif_stop_queue(dev);
   9.983 -
   9.984 -	/* Turn off statistics ASAP.  We update lp->stats below. */
   9.985 -	outw(StatsDisable, ioaddr + EL3_CMD);
   9.986 -
   9.987 -	/* Disable the receiver and transmitter. */
   9.988 -	outw(RxDisable, ioaddr + EL3_CMD);
   9.989 -	outw(TxDisable, ioaddr + EL3_CMD);
   9.990 -
   9.991 -	if (dev->if_port == 3)
   9.992 -		/* Turn off thinnet power.  Green! */
   9.993 -		outw(StopCoax, ioaddr + EL3_CMD);
   9.994 -	else if (dev->if_port == 0) {
   9.995 -		/* Disable link beat and jabber, if_port may change ere next open(). */
   9.996 -		EL3WINDOW(4);
   9.997 -		outw(inw(ioaddr + WN4_MEDIA) & ~MEDIA_TP, ioaddr + WN4_MEDIA);
   9.998 -	}
   9.999 -
  9.1000 -	free_irq(dev->irq, dev);
  9.1001 -	/* Switching back to window 0 disables the IRQ. */
  9.1002 -	EL3WINDOW(0);
  9.1003 -	/* But we explicitly zero the IRQ line select anyway. */
  9.1004 -	outw(0x0f00, ioaddr + WN0_IRQ);
  9.1005 -
  9.1006 -	update_stats(dev);
  9.1007 -	return 0;
  9.1008 -}
  9.1009 -
  9.1010 -/*#ifdef MODULE*/
  9.1011 -/* Parameters that may be passed into the module. */
  9.1012 -static int debug = -1;
  9.1013 -static int irq[] = {-1, -1, -1, -1, -1, -1, -1, -1};
  9.1014 -static int xcvr[] = {-1, -1, -1, -1, -1, -1, -1, -1};
  9.1015 -
  9.1016 -MODULE_PARM(debug,"i");
  9.1017 -MODULE_PARM(irq,"1-8i");
  9.1018 -MODULE_PARM(xcvr,"1-8i");
  9.1019 -MODULE_PARM(max_interrupt_work, "i");
  9.1020 -MODULE_PARM_DESC(debug, "EtherLink III debug level (0-6)");
  9.1021 -MODULE_PARM_DESC(irq, "EtherLink III IRQ number(s) (assigned)");
  9.1022 -MODULE_PARM_DESC(xcvr,"EtherLink III tranceiver(s) (0=internal, 1=external)");
  9.1023 -MODULE_PARM_DESC(max_interrupt_work, "EtherLink III maximum events handled per interrupt");
  9.1024 -#ifdef CONFIG_ISAPNP
  9.1025 -MODULE_PARM(nopnp, "i");
  9.1026 -MODULE_PARM_DESC(nopnp, "EtherLink III disable ISA PnP support (0-1)");
  9.1027 -#endif	/* CONFIG_ISAPNP */
  9.1028 -
  9.1029 -int
  9.1030 -init_module(void)
  9.1031 -{
  9.1032 -	int el3_cards = 0;
  9.1033 -
  9.1034 -	if (debug >= 0)
  9.1035 -		el3_debug = debug;
  9.1036 -
  9.1037 -	el3_root_dev = NULL;
  9.1038 -	while (el3_probe(0) == 0) {
  9.1039 -		if (irq[el3_cards] > 1)
  9.1040 -			el3_root_dev->irq = irq[el3_cards];
  9.1041 -		if (xcvr[el3_cards] >= 0)
  9.1042 -			el3_root_dev->if_port = xcvr[el3_cards];
  9.1043 -		el3_cards++;
  9.1044 -	}
  9.1045 -
  9.1046 -	return el3_cards ? 0 : -ENODEV;
  9.1047 -}
  9.1048 -
  9.1049 -void
  9.1050 -cleanup_module(void)
  9.1051 -{
  9.1052 -	struct net_device *next_dev;
  9.1053 -
  9.1054 -	/* No need to check MOD_IN_USE, as sys_delete_module() checks. */
  9.1055 -	while (el3_root_dev) {
  9.1056 -		struct el3_private *lp = (struct el3_private *)el3_root_dev->priv;
  9.1057 -#ifdef CONFIG_MCA		
  9.1058 -		if(lp->mca_slot!=-1)
  9.1059 -			mca_mark_as_unused(lp->mca_slot);
  9.1060 -#endif			
  9.1061 -		next_dev = lp->next_dev;
  9.1062 -		unregister_netdev(el3_root_dev);
  9.1063 -		release_region(el3_root_dev->base_addr, EL3_IO_EXTENT);
  9.1064 -		kfree(el3_root_dev);
  9.1065 -		el3_root_dev = next_dev;
  9.1066 -	}
  9.1067 -}
  9.1068 -/*#endif*/
  9.1069 -
  9.1070 -module_init(init_module);
  9.1071 -module_exit(cleanup_module);
    10.1 --- a/xen-2.4.16/drivers/net/3c59x.c	Fri Feb 14 13:18:19 2003 +0000
    10.2 +++ b/xen-2.4.16/drivers/net/3c59x.c	Fri Feb 14 14:27:45 2003 +0000
    10.3 @@ -188,13 +188,15 @@
    10.4  /* "Knobs" that adjust features and parameters. */
    10.5  /* Set the copy breakpoint for the copy-only-tiny-frames scheme.
    10.6     Setting to > 1512 effectively disables this feature. */
    10.7 -#ifndef __arm__
    10.8 +/*#ifndef __arm__
    10.9  static const int rx_copybreak = 200;
   10.10 -#else
   10.11 +#else*/
   10.12  /* ARM systems perform better by disregarding the bus-master
   10.13     transfer capability of these cards. -- rmk */
   10.14 -static const int rx_copybreak = 1513;
   10.15 -#endif
   10.16 +/*static const int rx_copybreak = 1513;
   10.17 +#endif*/
   10.18 +static const int rx_copybreak = 0; /* Xen doesn't copybreak in drivers. */
   10.19 +
   10.20  /* Allow setting MTU to a larger size, bypassing the normal ethernet setup. */
   10.21  static const int mtu = 1500;
   10.22  /* Maximum events (Rx packets, etc.) to handle at each interrupt. */
    11.1 --- a/xen-2.4.16/drivers/net/8139cp.c	Fri Feb 14 13:18:19 2003 +0000
    11.2 +++ b/xen-2.4.16/drivers/net/8139cp.c	Fri Feb 14 14:27:45 2003 +0000
    11.3 @@ -78,13 +78,17 @@ MODULE_PARM (multicast_filter_limit, "i"
    11.4  MODULE_PARM_DESC (multicast_filter_limit, "8139cp maximum number of filtered multicast addresses");
    11.5  
    11.6  /* Set the copy breakpoint for the copy-only-tiny-buffer Rx structure. */
    11.7 -#if defined(__alpha__) || defined(__arm__) || defined(__hppa__) \
    11.8 +/*#if defined(__alpha__) || defined(__arm__) || defined(__hppa__) \
    11.9          || defined(__sparc_) || defined(__ia64__) \
   11.10          || defined(__sh__) || defined(__mips__)
   11.11  static int rx_copybreak = 1518;
   11.12  #else
   11.13  static int rx_copybreak = 100;
   11.14 -#endif
   11.15 +#endif*/
   11.16 +
   11.17 +/* Xen doesn't do rx_copybreak in drivers. */
   11.18 +static int rx_copybreak = 0;
   11.19 +        
   11.20  MODULE_PARM (rx_copybreak, "i");
   11.21  MODULE_PARM_DESC (rx_copybreak, "8139cp Breakpoint at which Rx packets are copied");
   11.22  
    12.1 --- a/xen-2.4.16/drivers/net/8139too.c	Fri Feb 14 13:18:19 2003 +0000
    12.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    12.3 @@ -1,2324 +0,0 @@
    12.4 -
    12.5 -#define CONFIG_8139TOO_8129
    12.6 -
    12.7 -/*
    12.8 -
    12.9 -	8139too.c: A RealTek RTL-8139 Fast Ethernet driver for Linux.
   12.10 -
   12.11 -	Maintained by Jeff Garzik <jgarzik@mandrakesoft.com>
   12.12 -	Copyright 2000,2001 Jeff Garzik
   12.13 -
   12.14 -	Much code comes from Donald Becker's rtl8139.c driver,
   12.15 -	versions 1.13 and older.  This driver was originally based
   12.16 -	on rtl8139.c version 1.07.  Header of rtl8139.c version 1.13:
   12.17 -
   12.18 -	-----<snip>-----
   12.19 -
   12.20 -        	Written 1997-2001 by Donald Becker.
   12.21 -		This software may be used and distributed according to the
   12.22 -		terms of the GNU General Public License (GPL), incorporated
   12.23 -		herein by reference.  Drivers based on or derived from this
   12.24 -		code fall under the GPL and must retain the authorship,
   12.25 -		copyright and license notice.  This file is not a complete
   12.26 -		program and may only be used when the entire operating
   12.27 -		system is licensed under the GPL.
   12.28 -
   12.29 -		This driver is for boards based on the RTL8129 and RTL8139
   12.30 -		PCI ethernet chips.
   12.31 -
   12.32 -		The author may be reached as becker@scyld.com, or C/O Scyld
   12.33 -		Computing Corporation 410 Severn Ave., Suite 210 Annapolis
   12.34 -		MD 21403
   12.35 -
   12.36 -		Support and updates available at
   12.37 -		http://www.scyld.com/network/rtl8139.html
   12.38 -
   12.39 -		Twister-tuning table provided by Kinston
   12.40 -		<shangh@realtek.com.tw>.
   12.41 -
   12.42 -	-----<snip>-----
   12.43 -
   12.44 -	This software may be used and distributed according to the terms
   12.45 -	of the GNU General Public License, incorporated herein by reference.
   12.46 -
   12.47 -	Contributors:
   12.48 -
   12.49 -		Donald Becker - he wrote the original driver, kudos to him!
   12.50 -		(but please don't e-mail him for support, this isn't his driver)
   12.51 -
   12.52 -		Tigran Aivazian - bug fixes, skbuff free cleanup
   12.53 -
   12.54 -		Martin Mares - suggestions for PCI cleanup
   12.55 -
   12.56 -		David S. Miller - PCI DMA and softnet updates
   12.57 -
   12.58 -		Ernst Gill - fixes ported from BSD driver
   12.59 -
   12.60 -		Daniel Kobras - identified specific locations of
   12.61 -			posted MMIO write bugginess
   12.62 -
   12.63 -		Gerard Sharp - bug fix, testing and feedback
   12.64 -
   12.65 -		David Ford - Rx ring wrap fix
   12.66 -
   12.67 -		Dan DeMaggio - swapped RTL8139 cards with me, and allowed me
   12.68 -		to find and fix a crucial bug on older chipsets.
   12.69 -
   12.70 -		Donald Becker/Chris Butterworth/Marcus Westergren -
   12.71 -		Noticed various Rx packet size-related buglets.
   12.72 -
   12.73 -		Santiago Garcia Mantinan - testing and feedback
   12.74 -
   12.75 -		Jens David - 2.2.x kernel backports
   12.76 -
   12.77 -		Martin Dennett - incredibly helpful insight on undocumented
   12.78 -		features of the 8139 chips
   12.79 -
   12.80 -		Jean-Jacques Michel - bug fix
   12.81 -
   12.82 -		Tobias Ringström - Rx interrupt status checking suggestion
   12.83 -
   12.84 -		Andrew Morton - Clear blocked signals, avoid
   12.85 -		buffer overrun setting current->comm.
   12.86 -
   12.87 -		Kalle Olavi Niemitalo - Wake-on-LAN ioctls
   12.88 -
   12.89 -		Robert Kuebel - Save kernel thread from dying on any signal.
   12.90 -
   12.91 -	Submitting bug reports:
   12.92 -
   12.93 -		"rtl8139-diag -mmmaaavvveefN" output
   12.94 -		enable RTL8139_DEBUG below, and look at 'dmesg' or kernel log
   12.95 -
   12.96 -		See 8139too.txt for more details.
   12.97 -
   12.98 -*/
   12.99 -
  12.100 -#define DRV_NAME	"8139too"
  12.101 -#define DRV_VERSION	"0.9.22"
  12.102 -
  12.103 -
  12.104 -#include <linux/config.h>
  12.105 -#include <linux/module.h>
  12.106 -//#include <linux/kernel.h>
  12.107 -//#include <linux/compiler.h>
  12.108 -#include <linux/pci.h>
  12.109 -#include <linux/init.h>
  12.110 -#include <linux/ioport.h>
  12.111 -#include <linux/netdevice.h>
  12.112 -#include <linux/etherdevice.h>
  12.113 -//#include <linux/rtnetlink.h>
  12.114 -#include <linux/delay.h>
  12.115 -#include <linux/ethtool.h>
  12.116 -#include <linux/mii.h>
  12.117 -//#include <linux/completion.h>
  12.118 -#include <asm/io.h>
  12.119 -#include <asm/uaccess.h>
  12.120 -
  12.121 -#define RTL8139_DRIVER_NAME   DRV_NAME " Fast Ethernet driver " DRV_VERSION
  12.122 -#define PFX DRV_NAME ": "
  12.123 -
  12.124 -
  12.125 -/* enable PIO instead of MMIO, if CONFIG_8139TOO_PIO is selected */
  12.126 -#ifdef CONFIG_8139TOO_PIO
  12.127 -#define USE_IO_OPS 1
  12.128 -#endif
  12.129 -
  12.130 -/* define to 1 to enable copious debugging info */
  12.131 -#undef RTL8139_DEBUG
  12.132 -
  12.133 -/* define to 1 to disable lightweight runtime debugging checks */
  12.134 -#undef RTL8139_NDEBUG
  12.135 -
  12.136 -
  12.137 -#ifdef RTL8139_DEBUG
  12.138 -/* note: prints function name for you */
  12.139 -#  define DPRINTK(fmt, args...) printk(KERN_DEBUG "%s: " fmt, __FUNCTION__ , ## args)
  12.140 -#else
  12.141 -#  define DPRINTK(fmt, args...)
  12.142 -#endif
  12.143 -
  12.144 -#ifdef RTL8139_NDEBUG
  12.145 -#  define assert(expr) do {} while (0)
  12.146 -#else
  12.147 -#  define assert(expr) \
  12.148 -        if(!(expr)) {					\
  12.149 -        printk( "Assertion failed! %s,%s,%s,line=%d\n",	\
  12.150 -        #expr,__FILE__,__FUNCTION__,__LINE__);		\
  12.151 -        }
  12.152 -#endif
  12.153 -
  12.154 -
  12.155 -/* A few user-configurable values. */
  12.156 -/* media options */
  12.157 -#define MAX_UNITS 8
  12.158 -static int media[MAX_UNITS] = {-1, -1, -1, -1, -1, -1, -1, -1};
  12.159 -static int full_duplex[MAX_UNITS] = {-1, -1, -1, -1, -1, -1, -1, -1};
  12.160 -
  12.161 -/* Maximum events (Rx packets, etc.) to handle at each interrupt. */
  12.162 -static int max_interrupt_work = 20;
  12.163 -
  12.164 -/* Maximum number of multicast addresses to filter (vs. Rx-all-multicast).
  12.165 -   The RTL chips use a 64 element hash table based on the Ethernet CRC.  */
  12.166 -static int multicast_filter_limit = 32;
  12.167 -
  12.168 -/* Size of the in-memory receive ring. */
  12.169 -#define RX_BUF_LEN_IDX	2	/* 0==8K, 1==16K, 2==32K, 3==64K */
  12.170 -#define RX_BUF_LEN	(8192 << RX_BUF_LEN_IDX)
  12.171 -#define RX_BUF_PAD	16
  12.172 -#define RX_BUF_WRAP_PAD 2048 /* spare padding to handle lack of packet wrap */
  12.173 -#define RX_BUF_TOT_LEN	(RX_BUF_LEN + RX_BUF_PAD + RX_BUF_WRAP_PAD)
  12.174 -
  12.175 -/* Number of Tx descriptor registers. */
  12.176 -#define NUM_TX_DESC	4
  12.177 -
  12.178 -/* max supported ethernet frame size -- must be at least (dev->mtu+14+4).*/
  12.179 -#define MAX_ETH_FRAME_SIZE	1536
  12.180 -
  12.181 -/* Size of the Tx bounce buffers -- must be at least (dev->mtu+14+4). */
  12.182 -#define TX_BUF_SIZE	MAX_ETH_FRAME_SIZE
  12.183 -#define TX_BUF_TOT_LEN	(TX_BUF_SIZE * NUM_TX_DESC)
  12.184 -
  12.185 -/* PCI Tuning Parameters
  12.186 -   Threshold is bytes transferred to chip before transmission starts. */
  12.187 -#define TX_FIFO_THRESH 256	/* In bytes, rounded down to 32 byte units. */
  12.188 -
  12.189 -/* The following settings are log_2(bytes)-4:  0 == 16 bytes .. 6==1024, 7==end of packet. */
  12.190 -#define RX_FIFO_THRESH	7	/* Rx buffer level before first PCI xfer.  */
  12.191 -#define RX_DMA_BURST	7	/* Maximum PCI burst, '6' is 1024 */
  12.192 -#define TX_DMA_BURST	6	/* Maximum PCI burst, '6' is 1024 */
  12.193 -#define TX_RETRY	8	/* 0-15.  retries = 16 + (TX_RETRY * 16) */
  12.194 -
  12.195 -/* Operational parameters that usually are not changed. */
  12.196 -/* Time in jiffies before concluding the transmitter is hung. */
  12.197 -#define TX_TIMEOUT  (6*HZ)
  12.198 -
  12.199 -
  12.200 -enum {
  12.201 -	HAS_MII_XCVR = 0x010000,
  12.202 -	HAS_CHIP_XCVR = 0x020000,
  12.203 -	HAS_LNK_CHNG = 0x040000,
  12.204 -};
  12.205 -
  12.206 -#define RTL_MIN_IO_SIZE 0x80
  12.207 -#define RTL8139B_IO_SIZE 256
  12.208 -
  12.209 -#define RTL8129_CAPS	HAS_MII_XCVR
  12.210 -#define RTL8139_CAPS	HAS_CHIP_XCVR|HAS_LNK_CHNG
  12.211 -
  12.212 -typedef enum {
  12.213 -	RTL8139 = 0,
  12.214 -	RTL8139_CB,
  12.215 -	SMC1211TX,
  12.216 -	/*MPX5030,*/
  12.217 -	DELTA8139,
  12.218 -	ADDTRON8139,
  12.219 -	DFE538TX,
  12.220 -	DFE690TXD,
  12.221 -	RTL8129,
  12.222 -} board_t;
  12.223 -
  12.224 -
  12.225 -/* indexed by board_t, above */
  12.226 -static struct {
  12.227 -	const char *name;
  12.228 -	u32 hw_flags;
  12.229 -} board_info[] __devinitdata = {
  12.230 -	{ "RealTek RTL8139 Fast Ethernet", RTL8139_CAPS },
  12.231 -	{ "RealTek RTL8139B PCI/CardBus", RTL8139_CAPS },
  12.232 -	{ "SMC1211TX EZCard 10/100 (RealTek RTL8139)", RTL8139_CAPS },
  12.233 -/*	{ MPX5030, "Accton MPX5030 (RealTek RTL8139)", RTL8139_CAPS },*/
  12.234 -	{ "Delta Electronics 8139 10/100BaseTX", RTL8139_CAPS },
  12.235 -	{ "Addtron Technolgy 8139 10/100BaseTX", RTL8139_CAPS },
  12.236 -	{ "D-Link DFE-538TX (RealTek RTL8139)", RTL8139_CAPS },
  12.237 -	{ "D-Link DFE-690TXD (RealTek RTL8139)", RTL8139_CAPS },
  12.238 -	{ "RealTek RTL8129", RTL8129_CAPS },
  12.239 -};
  12.240 -
  12.241 -
  12.242 -static struct pci_device_id rtl8139_pci_tbl[] __devinitdata = {
  12.243 -	{0x10ec, 0x8139, PCI_ANY_ID, PCI_ANY_ID, 0, 0, RTL8139 },
  12.244 -	{0x10ec, 0x8138, PCI_ANY_ID, PCI_ANY_ID, 0, 0, RTL8139_CB },
  12.245 -	{0x1113, 0x1211, PCI_ANY_ID, PCI_ANY_ID, 0, 0, SMC1211TX },
  12.246 -/*	{0x1113, 0x1211, PCI_ANY_ID, PCI_ANY_ID, 0, 0, MPX5030 },*/
  12.247 -	{0x1500, 0x1360, PCI_ANY_ID, PCI_ANY_ID, 0, 0, DELTA8139 },
  12.248 -	{0x4033, 0x1360, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ADDTRON8139 },
  12.249 -	{0x1186, 0x1300, PCI_ANY_ID, PCI_ANY_ID, 0, 0, DFE538TX },
  12.250 -	{0x1186, 0x1340, PCI_ANY_ID, PCI_ANY_ID, 0, 0, DFE690TXD },
  12.251 -
  12.252 -#ifdef CONFIG_8139TOO_8129
  12.253 -	{0x10ec, 0x8129, PCI_ANY_ID, PCI_ANY_ID, 0, 0, RTL8129 },
  12.254 -#endif
  12.255 -
  12.256 -	/* some crazy cards report invalid vendor ids like
  12.257 -	 * 0x0001 here.  The other ids are valid and constant,
  12.258 -	 * so we simply don't match on the main vendor id.
  12.259 -	 */
  12.260 -	{PCI_ANY_ID, 0x8139, 0x10ec, 0x8139, 0, 0, RTL8139 },
  12.261 -	{PCI_ANY_ID, 0x8139, 0x1186, 0x1300, 0, 0, DFE538TX },
  12.262 -
  12.263 -	{0,}
  12.264 -};
  12.265 -MODULE_DEVICE_TABLE (pci, rtl8139_pci_tbl);
  12.266 -
  12.267 -
  12.268 -/* The rest of these values should never change. */
  12.269 -
  12.270 -/* Symbolic offsets to registers. */
  12.271 -enum RTL8139_registers {
  12.272 -	MAC0 = 0,		/* Ethernet hardware address. */
  12.273 -	MAR0 = 8,		/* Multicast filter. */
  12.274 -	TxStatus0 = 0x10,	/* Transmit status (Four 32bit registers). */
  12.275 -	TxAddr0 = 0x20,		/* Tx descriptors (also four 32bit). */
  12.276 -	RxBuf = 0x30,
  12.277 -	ChipCmd = 0x37,
  12.278 -	RxBufPtr = 0x38,
  12.279 -	RxBufAddr = 0x3A,
  12.280 -	IntrMask = 0x3C,
  12.281 -	IntrStatus = 0x3E,
  12.282 -	TxConfig = 0x40,
  12.283 -	ChipVersion = 0x43,
  12.284 -	RxConfig = 0x44,
  12.285 -	Timer = 0x48,		/* A general-purpose counter. */
  12.286 -	RxMissed = 0x4C,	/* 24 bits valid, write clears. */
  12.287 -	Cfg9346 = 0x50,
  12.288 -	Config0 = 0x51,
  12.289 -	Config1 = 0x52,
  12.290 -	FlashReg = 0x54,
  12.291 -	MediaStatus = 0x58,
  12.292 -	Config3 = 0x59,
  12.293 -	Config4 = 0x5A,		/* absent on RTL-8139A */
  12.294 -	HltClk = 0x5B,
  12.295 -	MultiIntr = 0x5C,
  12.296 -	TxSummary = 0x60,
  12.297 -	BasicModeCtrl = 0x62,
  12.298 -	BasicModeStatus = 0x64,
  12.299 -	NWayAdvert = 0x66,
  12.300 -	NWayLPAR = 0x68,
  12.301 -	NWayExpansion = 0x6A,
  12.302 -	/* Undocumented registers, but required for proper operation. */
  12.303 -	FIFOTMS = 0x70,		/* FIFO Control and test. */
  12.304 -	CSCR = 0x74,		/* Chip Status and Configuration Register. */
  12.305 -	PARA78 = 0x78,
  12.306 -	PARA7c = 0x7c,		/* Magic transceiver parameter register. */
  12.307 -	Config5 = 0xD8,		/* absent on RTL-8139A */
  12.308 -};
  12.309 -
  12.310 -enum ClearBitMasks {
  12.311 -	MultiIntrClear = 0xF000,
  12.312 -	ChipCmdClear = 0xE2,
  12.313 -	Config1Clear = (1<<7)|(1<<6)|(1<<3)|(1<<2)|(1<<1),
  12.314 -};
  12.315 -
  12.316 -enum ChipCmdBits {
  12.317 -	CmdReset = 0x10,
  12.318 -	CmdRxEnb = 0x08,
  12.319 -	CmdTxEnb = 0x04,
  12.320 -	RxBufEmpty = 0x01,
  12.321 -};
  12.322 -
  12.323 -/* Interrupt register bits, using my own meaningful names. */
  12.324 -enum IntrStatusBits {
  12.325 -	PCIErr = 0x8000,
  12.326 -	PCSTimeout = 0x4000,
  12.327 -	RxFIFOOver = 0x40,
  12.328 -	RxUnderrun = 0x20,
  12.329 -	RxOverflow = 0x10,
  12.330 -	TxErr = 0x08,
  12.331 -	TxOK = 0x04,
  12.332 -	RxErr = 0x02,
  12.333 -	RxOK = 0x01,
  12.334 -
  12.335 -	RxAckBits = RxFIFOOver | RxOverflow | RxOK,
  12.336 -};
  12.337 -
  12.338 -enum TxStatusBits {
  12.339 -	TxHostOwns = 0x2000,
  12.340 -	TxUnderrun = 0x4000,
  12.341 -	TxStatOK = 0x8000,
  12.342 -	TxOutOfWindow = 0x20000000,
  12.343 -	TxAborted = 0x40000000,
  12.344 -	TxCarrierLost = 0x80000000,
  12.345 -};
  12.346 -enum RxStatusBits {
  12.347 -	RxMulticast = 0x8000,
  12.348 -	RxPhysical = 0x4000,
  12.349 -	RxBroadcast = 0x2000,
  12.350 -	RxBadSymbol = 0x0020,
  12.351 -	RxRunt = 0x0010,
  12.352 -	RxTooLong = 0x0008,
  12.353 -	RxCRCErr = 0x0004,
  12.354 -	RxBadAlign = 0x0002,
  12.355 -	RxStatusOK = 0x0001,
  12.356 -};
  12.357 -
  12.358 -/* Bits in RxConfig. */
  12.359 -enum rx_mode_bits {
  12.360 -	AcceptErr = 0x20,
  12.361 -	AcceptRunt = 0x10,
  12.362 -	AcceptBroadcast = 0x08,
  12.363 -	AcceptMulticast = 0x04,
  12.364 -	AcceptMyPhys = 0x02,
  12.365 -	AcceptAllPhys = 0x01,
  12.366 -};
  12.367 -
  12.368 -/* Bits in TxConfig. */
  12.369 -enum tx_config_bits {
  12.370 -	TxIFG1 = (1 << 25),	/* Interframe Gap Time */
  12.371 -	TxIFG0 = (1 << 24),	/* Enabling these bits violates IEEE 802.3 */
  12.372 -	TxLoopBack = (1 << 18) | (1 << 17), /* enable loopback test mode */
  12.373 -	TxCRC = (1 << 16),	/* DISABLE appending CRC to end of Tx packets */
  12.374 -	TxClearAbt = (1 << 0),	/* Clear abort (WO) */
  12.375 -	TxDMAShift = 8,		/* DMA burst value (0-7) is shifted this many bits */
  12.376 -	TxRetryShift = 4,	/* TXRR value (0-15) is shifted this many bits */
  12.377 -
  12.378 -	TxVersionMask = 0x7C800000, /* mask out version bits 30-26, 23 */
  12.379 -};
  12.380 -
  12.381 -/* Bits in Config1 */
  12.382 -enum Config1Bits {
  12.383 -	Cfg1_PM_Enable = 0x01,
  12.384 -	Cfg1_VPD_Enable = 0x02,
  12.385 -	Cfg1_PIO = 0x04,
  12.386 -	Cfg1_MMIO = 0x08,
  12.387 -	LWAKE = 0x10,		/* not on 8139, 8139A */
  12.388 -	Cfg1_Driver_Load = 0x20,
  12.389 -	Cfg1_LED0 = 0x40,
  12.390 -	Cfg1_LED1 = 0x80,
  12.391 -	SLEEP = (1 << 1),	/* only on 8139, 8139A */
  12.392 -	PWRDN = (1 << 0),	/* only on 8139, 8139A */
  12.393 -};
  12.394 -
  12.395 -/* Bits in Config3 */
  12.396 -enum Config3Bits {
  12.397 -	Cfg3_FBtBEn    = (1 << 0), /* 1 = Fast Back to Back */
  12.398 -	Cfg3_FuncRegEn = (1 << 1), /* 1 = enable CardBus Function registers */
  12.399 -	Cfg3_CLKRUN_En = (1 << 2), /* 1 = enable CLKRUN */
  12.400 -	Cfg3_CardB_En  = (1 << 3), /* 1 = enable CardBus registers */
  12.401 -	Cfg3_LinkUp    = (1 << 4), /* 1 = wake up on link up */
  12.402 -	Cfg3_Magic     = (1 << 5), /* 1 = wake up on Magic Packet (tm) */
  12.403 -	Cfg3_PARM_En   = (1 << 6), /* 0 = software can set twister parameters */
  12.404 -	Cfg3_GNTSel    = (1 << 7), /* 1 = delay 1 clock from PCI GNT signal */
  12.405 -};
  12.406 -
  12.407 -/* Bits in Config4 */
  12.408 -enum Config4Bits {
  12.409 -	LWPTN = (1 << 2),	/* not on 8139, 8139A */
  12.410 -};
  12.411 -
  12.412 -/* Bits in Config5 */
  12.413 -enum Config5Bits {
  12.414 -	Cfg5_PME_STS     = (1 << 0), /* 1 = PCI reset resets PME_Status */
  12.415 -	Cfg5_LANWake     = (1 << 1), /* 1 = enable LANWake signal */
  12.416 -	Cfg5_LDPS        = (1 << 2), /* 0 = save power when link is down */
  12.417 -	Cfg5_FIFOAddrPtr = (1 << 3), /* Realtek internal SRAM testing */
  12.418 -	Cfg5_UWF         = (1 << 4), /* 1 = accept unicast wakeup frame */
  12.419 -	Cfg5_MWF         = (1 << 5), /* 1 = accept multicast wakeup frame */
  12.420 -	Cfg5_BWF         = (1 << 6), /* 1 = accept broadcast wakeup frame */
  12.421 -};
  12.422 -
  12.423 -enum RxConfigBits {
  12.424 -	/* rx fifo threshold */
  12.425 -	RxCfgFIFOShift = 13,
  12.426 -	RxCfgFIFONone = (7 << RxCfgFIFOShift),
  12.427 -
  12.428 -	/* Max DMA burst */
  12.429 -	RxCfgDMAShift = 8,
  12.430 -	RxCfgDMAUnlimited = (7 << RxCfgDMAShift),
  12.431 -
  12.432 -	/* rx ring buffer length */
  12.433 -	RxCfgRcv8K = 0,
  12.434 -	RxCfgRcv16K = (1 << 11),
  12.435 -	RxCfgRcv32K = (1 << 12),
  12.436 -	RxCfgRcv64K = (1 << 11) | (1 << 12),
  12.437 -
  12.438 -	/* Disable packet wrap at end of Rx buffer */
  12.439 -	RxNoWrap = (1 << 7),
  12.440 -};
  12.441 -
  12.442 -
  12.443 -/* Twister tuning parameters from RealTek.
  12.444 -   Completely undocumented, but required to tune bad links. */
  12.445 -enum CSCRBits {
  12.446 -	CSCR_LinkOKBit = 0x0400,
  12.447 -	CSCR_LinkChangeBit = 0x0800,
  12.448 -	CSCR_LinkStatusBits = 0x0f000,
  12.449 -	CSCR_LinkDownOffCmd = 0x003c0,
  12.450 -	CSCR_LinkDownCmd = 0x0f3c0,
  12.451 -};
  12.452 -
  12.453 -
  12.454 -enum Cfg9346Bits {
  12.455 -	Cfg9346_Lock = 0x00,
  12.456 -	Cfg9346_Unlock = 0xC0,
  12.457 -};
  12.458 -
  12.459 -
  12.460 -#define PARA78_default	0x78fa8388
  12.461 -#define PARA7c_default	0xcb38de43	/* param[0][3] */
  12.462 -#define PARA7c_xxx		0xcb38de43
  12.463 -static const unsigned long param[4][4] = {
  12.464 -	{0xcb39de43, 0xcb39ce43, 0xfb38de03, 0xcb38de43},
  12.465 -	{0xcb39de43, 0xcb39ce43, 0xcb39ce83, 0xcb39ce83},
  12.466 -	{0xcb39de43, 0xcb39ce43, 0xcb39ce83, 0xcb39ce83},
  12.467 -	{0xbb39de43, 0xbb39ce43, 0xbb39ce83, 0xbb39ce83}
  12.468 -};
  12.469 -
  12.470 -typedef enum {
  12.471 -	CH_8139 = 0,
  12.472 -	CH_8139_K,
  12.473 -	CH_8139A,
  12.474 -	CH_8139B,
  12.475 -	CH_8130,
  12.476 -	CH_8139C,
  12.477 -} chip_t;
  12.478 -
  12.479 -enum chip_flags {
  12.480 -	HasHltClk = (1 << 0),
  12.481 -	HasLWake = (1 << 1),
  12.482 -};
  12.483 -
  12.484 -
  12.485 -/* directly indexed by chip_t, above */
  12.486 -const static struct {
  12.487 -	const char *name;
  12.488 -	u8 version; /* from RTL8139C docs */
  12.489 -	u32 RxConfigMask; /* should clear the bits supported by this chip */
  12.490 -	u32 flags;
  12.491 -} rtl_chip_info[] = {
  12.492 -	{ "RTL-8139",
  12.493 -	  0x40,
  12.494 -	  0xf0fe0040, /* XXX copied from RTL8139A, verify */
  12.495 -	  HasHltClk,
  12.496 -	},
  12.497 -
  12.498 -	{ "RTL-8139 rev K",
  12.499 -	  0x60,
  12.500 -	  0xf0fe0040,
  12.501 -	  HasHltClk,
  12.502 -	},
  12.503 -
  12.504 -	{ "RTL-8139A",
  12.505 -	  0x70,
  12.506 -	  0xf0fe0040,
  12.507 -	  HasHltClk, /* XXX undocumented? */
  12.508 -	},
  12.509 -
  12.510 -	{ "RTL-8139B",
  12.511 -	  0x78,
  12.512 -	  0xf0fc0040,
  12.513 -	  HasLWake,
  12.514 -	},
  12.515 -
  12.516 -	{ "RTL-8130",
  12.517 -	  0x7C,
  12.518 -	  0xf0fe0040, /* XXX copied from RTL8139A, verify */
  12.519 -	  HasLWake,
  12.520 -	},
  12.521 -
  12.522 -	{ "RTL-8139C",
  12.523 -	  0x74,
  12.524 -	  0xf0fc0040, /* XXX copied from RTL8139B, verify */
  12.525 -	  HasLWake,
  12.526 -	},
  12.527 -
  12.528 -};
  12.529 -
  12.530 -struct rtl_extra_stats {
  12.531 -	unsigned long early_rx;
  12.532 -	unsigned long tx_buf_mapped;
  12.533 -	unsigned long tx_timeouts;
  12.534 -};
  12.535 -
  12.536 -struct rtl8139_private {
  12.537 -	void *mmio_addr;
  12.538 -	int drv_flags;
  12.539 -	struct pci_dev *pci_dev;
  12.540 -	struct net_device_stats stats;
  12.541 -	unsigned char *rx_ring;
  12.542 -	unsigned int cur_rx;	/* Index into the Rx buffer of next Rx pkt. */
  12.543 -	unsigned int tx_flag;
  12.544 -	unsigned long cur_tx;
  12.545 -	unsigned long dirty_tx;
  12.546 -	unsigned char *tx_buf[NUM_TX_DESC];	/* Tx bounce buffers */
  12.547 -	unsigned char *tx_bufs;	/* Tx bounce buffer region. */
  12.548 -	dma_addr_t rx_ring_dma;
  12.549 -	dma_addr_t tx_bufs_dma;
  12.550 -	signed char phys[4];		/* MII device addresses. */
  12.551 -	char twistie, twist_row, twist_col;	/* Twister tune state. */
  12.552 -	unsigned int full_duplex:1;	/* Full-duplex operation requested. */
  12.553 -	unsigned int duplex_lock:1;
  12.554 -	unsigned int default_port:4;	/* Last dev->if_port value. */
  12.555 -	unsigned int media2:4;	/* Secondary monitored media port. */
  12.556 -	unsigned int medialock:1;	/* Don't sense media type. */
  12.557 -	unsigned int mediasense:1;	/* Media sensing in progress. */
  12.558 -	spinlock_t lock;
  12.559 -	chip_t chipset;
  12.560 -	u32 rx_config;
  12.561 -	struct rtl_extra_stats xstats;
  12.562 -	int time_to_die;
  12.563 -};
  12.564 -
  12.565 -MODULE_AUTHOR ("Jeff Garzik <jgarzik@mandrakesoft.com>");
  12.566 -MODULE_DESCRIPTION ("RealTek RTL-8139 Fast Ethernet driver");
  12.567 -MODULE_LICENSE("GPL");
  12.568 -
  12.569 -MODULE_PARM (multicast_filter_limit, "i");
  12.570 -MODULE_PARM (max_interrupt_work, "i");
  12.571 -MODULE_PARM (media, "1-" __MODULE_STRING(MAX_UNITS) "i");
  12.572 -MODULE_PARM (full_duplex, "1-" __MODULE_STRING(MAX_UNITS) "i");
  12.573 -MODULE_PARM_DESC (multicast_filter_limit, "8139too maximum number of filtered multicast addresses");
  12.574 -MODULE_PARM_DESC (max_interrupt_work, "8139too maximum events handled per interrupt");
  12.575 -MODULE_PARM_DESC (media, "8139too: Bits 4+9: force full duplex, bit 5: 100Mbps");
  12.576 -MODULE_PARM_DESC (full_duplex, "8139too: Force full duplex for board(s) (1)");
  12.577 -
  12.578 -static int read_eeprom (void *ioaddr, int location, int addr_len);
  12.579 -static int rtl8139_open (struct net_device *dev);
  12.580 -static int mdio_read (struct net_device *dev, int phy_id, int location);
  12.581 -static void mdio_write (struct net_device *dev, int phy_id, int location,
  12.582 -			int val);
  12.583 -static void rtl8139_tx_timeout (struct net_device *dev);
  12.584 -static void rtl8139_init_ring (struct net_device *dev);
  12.585 -static int rtl8139_start_xmit (struct sk_buff *skb,
  12.586 -			       struct net_device *dev);
  12.587 -static void rtl8139_interrupt (int irq, void *dev_instance,
  12.588 -			       struct pt_regs *regs);
  12.589 -static int rtl8139_close (struct net_device *dev);
  12.590 -static int netdev_ioctl (struct net_device *dev, struct ifreq *rq, int cmd);
  12.591 -static struct net_device_stats *rtl8139_get_stats (struct net_device *dev);
  12.592 -static inline u32 ether_crc (int length, unsigned char *data);
  12.593 -static void rtl8139_set_rx_mode (struct net_device *dev);
  12.594 -static void __set_rx_mode (struct net_device *dev);
  12.595 -static void rtl8139_hw_start (struct net_device *dev);
  12.596 -
  12.597 -#ifdef USE_IO_OPS
  12.598 -
  12.599 -#define RTL_R8(reg)		inb (((unsigned long)ioaddr) + (reg))
  12.600 -#define RTL_R16(reg)		inw (((unsigned long)ioaddr) + (reg))
  12.601 -#define RTL_R32(reg)		((unsigned long) inl (((unsigned long)ioaddr) + (reg)))
  12.602 -#define RTL_W8(reg, val8)	outb ((val8), ((unsigned long)ioaddr) + (reg))
  12.603 -#define RTL_W16(reg, val16)	outw ((val16), ((unsigned long)ioaddr) + (reg))
  12.604 -#define RTL_W32(reg, val32)	outl ((val32), ((unsigned long)ioaddr) + (reg))
  12.605 -#define RTL_W8_F		RTL_W8
  12.606 -#define RTL_W16_F		RTL_W16
  12.607 -#define RTL_W32_F		RTL_W32
  12.608 -#undef readb
  12.609 -#undef readw
  12.610 -#undef readl
  12.611 -#undef writeb
  12.612 -#undef writew
  12.613 -#undef writel
  12.614 -#define readb(addr) inb((unsigned long)(addr))
  12.615 -#define readw(addr) inw((unsigned long)(addr))
  12.616 -#define readl(addr) inl((unsigned long)(addr))
  12.617 -#define writeb(val,addr) outb((val),(unsigned long)(addr))
  12.618 -#define writew(val,addr) outw((val),(unsigned long)(addr))
  12.619 -#define writel(val,addr) outl((val),(unsigned long)(addr))
  12.620 -
  12.621 -#else
  12.622 -
  12.623 -/* write MMIO register, with flush */
  12.624 -/* Flush avoids rtl8139 bug w/ posted MMIO writes */
  12.625 -#define RTL_W8_F(reg, val8)	do { writeb ((val8), ioaddr + (reg)); readb (ioaddr + (reg)); } while (0)
  12.626 -#define RTL_W16_F(reg, val16)	do { writew ((val16), ioaddr + (reg)); readw (ioaddr + (reg)); } while (0)
  12.627 -#define RTL_W32_F(reg, val32)	do { writel ((val32), ioaddr + (reg)); readl (ioaddr + (reg)); } while (0)
  12.628 -
  12.629 -
  12.630 -#define MMIO_FLUSH_AUDIT_COMPLETE 1
  12.631 -#if MMIO_FLUSH_AUDIT_COMPLETE
  12.632 -
  12.633 -/* write MMIO register */
  12.634 -#define RTL_W8(reg, val8)	writeb ((val8), ioaddr + (reg))
  12.635 -#define RTL_W16(reg, val16)	writew ((val16), ioaddr + (reg))
  12.636 -#define RTL_W32(reg, val32)	writel ((val32), ioaddr + (reg))
  12.637 -
  12.638 -#else
  12.639 -
  12.640 -/* write MMIO register, then flush */
  12.641 -#define RTL_W8		RTL_W8_F
  12.642 -#define RTL_W16		RTL_W16_F
  12.643 -#define RTL_W32		RTL_W32_F
  12.644 -
  12.645 -#endif /* MMIO_FLUSH_AUDIT_COMPLETE */
  12.646 -
  12.647 -/* read MMIO register */
  12.648 -#define RTL_R8(reg)		readb (ioaddr + (reg))
  12.649 -#define RTL_R16(reg)		readw (ioaddr + (reg))
  12.650 -#define RTL_R32(reg)		((unsigned long) readl (ioaddr + (reg)))
  12.651 -
  12.652 -#endif /* USE_IO_OPS */
  12.653 -
  12.654 -
  12.655 -static const u16 rtl8139_intr_mask =
  12.656 -	PCIErr | PCSTimeout | RxUnderrun | RxOverflow | RxFIFOOver |
  12.657 -	TxErr | TxOK | RxErr | RxOK;
  12.658 -
  12.659 -static const unsigned int rtl8139_rx_config =
  12.660 -	RxCfgRcv32K | RxNoWrap |
  12.661 -	(RX_FIFO_THRESH << RxCfgFIFOShift) |
  12.662 -	(RX_DMA_BURST << RxCfgDMAShift);
  12.663 -
  12.664 -static const unsigned int rtl8139_tx_config =
  12.665 -	(TX_DMA_BURST << TxDMAShift) | (TX_RETRY << TxRetryShift);
  12.666 -
  12.667 -static void __rtl8139_cleanup_dev (struct net_device *dev)
  12.668 -{
  12.669 -	struct rtl8139_private *tp;
  12.670 -	struct pci_dev *pdev;
  12.671 -
  12.672 -	assert (dev != NULL);
  12.673 -	assert (dev->priv != NULL);
  12.674 -
  12.675 -	tp = dev->priv;
  12.676 -	assert (tp->pci_dev != NULL);
  12.677 -	pdev = tp->pci_dev;
  12.678 -
  12.679 -#ifndef USE_IO_OPS
  12.680 -	if (tp->mmio_addr)
  12.681 -		iounmap (tp->mmio_addr);
  12.682 -#endif /* !USE_IO_OPS */
  12.683 -
  12.684 -	/* it's ok to call this even if we have no regions to free */
  12.685 -	pci_release_regions (pdev);
  12.686 -
  12.687 -#ifndef RTL8139_NDEBUG
  12.688 -	/* poison memory before freeing */
  12.689 -	memset (dev, 0xBC,
  12.690 -		sizeof (struct net_device) +
  12.691 -		sizeof (struct rtl8139_private));
  12.692 -#endif /* RTL8139_NDEBUG */
  12.693 -
  12.694 -	kfree (dev);
  12.695 -
  12.696 -	pci_set_drvdata (pdev, NULL);
  12.697 -}
  12.698 -
  12.699 -
  12.700 -static void rtl8139_chip_reset (void *ioaddr)
  12.701 -{
  12.702 -	int i;
  12.703 -
  12.704 -	/* Soft reset the chip. */
  12.705 -	RTL_W8 (ChipCmd, CmdReset);
  12.706 -
  12.707 -	/* Check that the chip has finished the reset. */
  12.708 -	for (i = 1000; i > 0; i--) {
  12.709 -		barrier();
  12.710 -		if ((RTL_R8 (ChipCmd) & CmdReset) == 0)
  12.711 -			break;
  12.712 -		udelay (10);
  12.713 -	}
  12.714 -}
  12.715 -
  12.716 -
  12.717 -static int __devinit rtl8139_init_board (struct pci_dev *pdev,
  12.718 -					 struct net_device **dev_out)
  12.719 -{
  12.720 -	void *ioaddr;
  12.721 -	struct net_device *dev;
  12.722 -	struct rtl8139_private *tp;
  12.723 -	u8 tmp8;
  12.724 -	int rc;
  12.725 -	unsigned int i;
  12.726 -	u32 pio_start, pio_end, pio_flags, pio_len;
  12.727 -	unsigned long mmio_start, mmio_end, mmio_flags, mmio_len;
  12.728 -	u32 tmp;
  12.729 -
  12.730 -	assert (pdev != NULL);
  12.731 -
  12.732 -	*dev_out = NULL;
  12.733 -
  12.734 -	/* dev and dev->priv zeroed in alloc_etherdev */
  12.735 -	dev = alloc_etherdev (sizeof (*tp));
  12.736 -	if (dev == NULL) {
  12.737 -		printk (KERN_ERR PFX "%s: Unable to alloc new net device\n", pdev->slot_name);
  12.738 -		return -ENOMEM;
  12.739 -	}
  12.740 -	SET_MODULE_OWNER(dev);
  12.741 -	tp = dev->priv;
  12.742 -	tp->pci_dev = pdev;
  12.743 -
  12.744 -	/* enable device (incl. PCI PM wakeup and hotplug setup) */
  12.745 -	rc = pci_enable_device (pdev);
  12.746 -	if (rc)
  12.747 -		goto err_out;
  12.748 -
  12.749 -	pio_start = pci_resource_start (pdev, 0);
  12.750 -	pio_end = pci_resource_end (pdev, 0);
  12.751 -	pio_flags = pci_resource_flags (pdev, 0);
  12.752 -	pio_len = pci_resource_len (pdev, 0);
  12.753 -
  12.754 -	mmio_start = pci_resource_start (pdev, 1);
  12.755 -	mmio_end = pci_resource_end (pdev, 1);
  12.756 -	mmio_flags = pci_resource_flags (pdev, 1);
  12.757 -	mmio_len = pci_resource_len (pdev, 1);
  12.758 -
  12.759 -	/* set this immediately, we need to know before
  12.760 -	 * we talk to the chip directly */
  12.761 -	DPRINTK("PIO region size == 0x%02X\n", pio_len);
  12.762 -	DPRINTK("MMIO region size == 0x%02lX\n", mmio_len);
  12.763 -
  12.764 -#ifdef USE_IO_OPS
  12.765 -	/* make sure PCI base addr 0 is PIO */
  12.766 -	if (!(pio_flags & IORESOURCE_IO)) {
  12.767 -		printk (KERN_ERR PFX "%s: region #0 not a PIO resource, aborting\n", pdev->slot_name);
  12.768 -		rc = -ENODEV;
  12.769 -		goto err_out;
  12.770 -	}
  12.771 -	/* check for weird/broken PCI region reporting */
  12.772 -	if (pio_len < RTL_MIN_IO_SIZE) {
  12.773 -		printk (KERN_ERR PFX "%s: Invalid PCI I/O region size(s), aborting\n", pdev->slot_name);
  12.774 -		rc = -ENODEV;
  12.775 -		goto err_out;
  12.776 -	}
  12.777 -#else
  12.778 -	/* make sure PCI base addr 1 is MMIO */
  12.779 -	if (!(mmio_flags & IORESOURCE_MEM)) {
  12.780 -		printk (KERN_ERR PFX "%s: region #1 not an MMIO resource, aborting\n", pdev->slot_name);
  12.781 -		rc = -ENODEV;
  12.782 -		goto err_out;
  12.783 -	}
  12.784 -	if (mmio_len < RTL_MIN_IO_SIZE) {
  12.785 -		printk (KERN_ERR PFX "%s: Invalid PCI mem region size(s), aborting\n", pdev->slot_name);
  12.786 -		rc = -ENODEV;
  12.787 -		goto err_out;
  12.788 -	}
  12.789 -#endif
  12.790 -
  12.791 -	rc = pci_request_regions (pdev, "8139too");
  12.792 -	if (rc)
  12.793 -		goto err_out;
  12.794 -
  12.795 -	/* enable PCI bus-mastering */
  12.796 -	pci_set_master (pdev);
  12.797 -
  12.798 -#ifdef USE_IO_OPS
  12.799 -	ioaddr = (void *) pio_start;
  12.800 -	dev->base_addr = pio_start;
  12.801 -	tp->mmio_addr = ioaddr;
  12.802 -#else
  12.803 -	/* ioremap MMIO region */
  12.804 -	ioaddr = ioremap (mmio_start, mmio_len);
  12.805 -	if (ioaddr == NULL) {
  12.806 -		printk (KERN_ERR PFX "%s: cannot remap MMIO, aborting\n", pdev->slot_name);
  12.807 -		rc = -EIO;
  12.808 -		goto err_out;
  12.809 -	}
  12.810 -	dev->base_addr = (long) ioaddr;
  12.811 -	tp->mmio_addr = ioaddr;
  12.812 -#endif /* USE_IO_OPS */
  12.813 -
  12.814 -	/* Bring old chips out of low-power mode. */
  12.815 -	RTL_W8 (HltClk, 'R');
  12.816 -
  12.817 -	/* check for missing/broken hardware */
  12.818 -	if (RTL_R32 (TxConfig) == 0xFFFFFFFF) {
  12.819 -		printk (KERN_ERR PFX "%s: Chip not responding, ignoring board\n",
  12.820 -			pdev->slot_name);
  12.821 -		rc = -EIO;
  12.822 -		goto err_out;
  12.823 -	}
  12.824 -
  12.825 -	/* identify chip attached to board */
  12.826 -	tmp = RTL_R8 (ChipVersion);
  12.827 -	for (i = 0; i < ARRAY_SIZE (rtl_chip_info); i++)
  12.828 -		if (tmp == rtl_chip_info[i].version) {
  12.829 -			tp->chipset = i;
  12.830 -			goto match;
  12.831 -		}
  12.832 -
  12.833 -	/* if unknown chip, assume array element #0, original RTL-8139 in this case */
  12.834 -	printk (KERN_DEBUG PFX "%s: unknown chip version, assuming RTL-8139\n",
  12.835 -		pdev->slot_name);
  12.836 -	printk (KERN_DEBUG PFX "%s: TxConfig = 0x%lx\n", pdev->slot_name, RTL_R32 (TxConfig));
  12.837 -	tp->chipset = 0;
  12.838 -
  12.839 -match:
  12.840 -	DPRINTK ("chipset id (%d) == index %d, '%s'\n",
  12.841 -		tmp,
  12.842 -		tp->chipset,
  12.843 -		rtl_chip_info[tp->chipset].name);
  12.844 -
  12.845 -	if (tp->chipset >= CH_8139B) {
  12.846 -		u8 new_tmp8 = tmp8 = RTL_R8 (Config1);
  12.847 -		DPRINTK("PCI PM wakeup\n");
  12.848 -		if ((rtl_chip_info[tp->chipset].flags & HasLWake) &&
  12.849 -		    (tmp8 & LWAKE))
  12.850 -			new_tmp8 &= ~LWAKE;
  12.851 -		new_tmp8 |= Cfg1_PM_Enable;
  12.852 -		if (new_tmp8 != tmp8) {
  12.853 -			RTL_W8 (Cfg9346, Cfg9346_Unlock);
  12.854 -			RTL_W8 (Config1, tmp8);
  12.855 -			RTL_W8 (Cfg9346, Cfg9346_Lock);
  12.856 -		}
  12.857 -		if (rtl_chip_info[tp->chipset].flags & HasLWake) {
  12.858 -			tmp8 = RTL_R8 (Config4);
  12.859 -			if (tmp8 & LWPTN)
  12.860 -				RTL_W8 (Config4, tmp8 & ~LWPTN);
  12.861 -		}
  12.862 -	} else {
  12.863 -		DPRINTK("Old chip wakeup\n");
  12.864 -		tmp8 = RTL_R8 (Config1);
  12.865 -		tmp8 &= ~(SLEEP | PWRDN);
  12.866 -		RTL_W8 (Config1, tmp8);
  12.867 -	}
  12.868 -
  12.869 -	rtl8139_chip_reset (ioaddr);
  12.870 -
  12.871 -	*dev_out = dev;
  12.872 -	return 0;
  12.873 -
  12.874 -err_out:
  12.875 -	__rtl8139_cleanup_dev (dev);
  12.876 -	return rc;
  12.877 -}
  12.878 -
  12.879 -
  12.880 -static int __devinit rtl8139_init_one (struct pci_dev *pdev,
  12.881 -				       const struct pci_device_id *ent)
  12.882 -{
  12.883 -	struct net_device *dev = NULL;
  12.884 -	struct rtl8139_private *tp;
  12.885 -	int i, addr_len, option;
  12.886 -	void *ioaddr;
  12.887 -	static int board_idx = -1;
  12.888 -	u8 pci_rev;
  12.889 -
  12.890 -	assert (pdev != NULL);
  12.891 -	assert (ent != NULL);
  12.892 -
  12.893 -	board_idx++;
  12.894 -
  12.895 -	/* when we're built into the kernel, the driver version message
  12.896 -	 * is only printed if at least one 8139 board has been found
  12.897 -	 */
  12.898 -#ifndef MODULE
  12.899 -	{
  12.900 -		static int printed_version;
  12.901 -		if (!printed_version++)
  12.902 -			printk (KERN_INFO RTL8139_DRIVER_NAME "\n");
  12.903 -	}
  12.904 -#endif
  12.905 -
  12.906 -	pci_read_config_byte(pdev, PCI_REVISION_ID, &pci_rev);
  12.907 -
  12.908 -	if (pdev->vendor == PCI_VENDOR_ID_REALTEK &&
  12.909 -	    pdev->device == PCI_DEVICE_ID_REALTEK_8139 && pci_rev >= 0x20) {
  12.910 -		printk(KERN_INFO PFX "pci dev %s (id %04x:%04x rev %02x) is an enhanced 8139C+ chip\n",
  12.911 -		       pdev->slot_name, pdev->vendor, pdev->device, pci_rev);
  12.912 -		printk(KERN_INFO PFX "Ensure the \"8139cp\" driver is installed!\n");
  12.913 -                return -ENODEV; /* force use of better driver */
  12.914 -	}
  12.915 -
  12.916 -	i = rtl8139_init_board (pdev, &dev);
  12.917 -	if (i < 0)
  12.918 -		return i;
  12.919 -
  12.920 -	tp = dev->priv;
  12.921 -	ioaddr = tp->mmio_addr;
  12.922 -
  12.923 -	assert (ioaddr != NULL);
  12.924 -	assert (dev != NULL);
  12.925 -	assert (tp != NULL);
  12.926 -
  12.927 -	addr_len = read_eeprom (ioaddr, 0, 8) == 0x8129 ? 8 : 6;
  12.928 -	for (i = 0; i < 3; i++)
  12.929 -		((u16 *) (dev->dev_addr))[i] =
  12.930 -		    le16_to_cpu (read_eeprom (ioaddr, i + 7, addr_len));
  12.931 -
  12.932 -	/* The Rtl8139-specific entries in the device structure. */
  12.933 -	dev->open = rtl8139_open;
  12.934 -	dev->hard_start_xmit = rtl8139_start_xmit;
  12.935 -	dev->stop = rtl8139_close;
  12.936 -	dev->get_stats = rtl8139_get_stats;
  12.937 -	dev->set_multicast_list = rtl8139_set_rx_mode;
  12.938 -	dev->do_ioctl = netdev_ioctl;
  12.939 -	dev->tx_timeout = rtl8139_tx_timeout;
  12.940 -	dev->watchdog_timeo = TX_TIMEOUT;
  12.941 -	dev->features |= NETIF_F_SG;
  12.942 -
  12.943 -	dev->irq = pdev->irq;
  12.944 -
  12.945 -	/* dev->priv/tp zeroed and aligned in init_etherdev */
  12.946 -	tp = dev->priv;
  12.947 -
  12.948 -	/* note: tp->chipset set in rtl8139_init_board */
  12.949 -	tp->drv_flags = board_info[ent->driver_data].hw_flags;
  12.950 -	tp->mmio_addr = ioaddr;
  12.951 -	spin_lock_init (&tp->lock);
  12.952 -
  12.953 -	/* dev is fully set up and ready to use now */
  12.954 -	DPRINTK("about to register device named %s (%p)...\n", dev->name, dev);
  12.955 -	i = register_netdev (dev);
  12.956 -	if (i) goto err_out;
  12.957 -
  12.958 -	pci_set_drvdata (pdev, dev);
  12.959 -
  12.960 -	printk (KERN_INFO "%s: %s at 0x%lx, "
  12.961 -		"%2.2x:%2.2x:%2.2x:%2.2x:%2.2x:%2.2x, "
  12.962 -		"IRQ %d\n",
  12.963 -		dev->name,
  12.964 -		board_info[ent->driver_data].name,
  12.965 -		dev->base_addr,
  12.966 -		dev->dev_addr[0], dev->dev_addr[1],
  12.967 -		dev->dev_addr[2], dev->dev_addr[3],
  12.968 -		dev->dev_addr[4], dev->dev_addr[5],
  12.969 -		dev->irq);
  12.970 -
  12.971 -	printk (KERN_DEBUG "%s:  Identified 8139 chip type '%s'\n",
  12.972 -		dev->name, rtl_chip_info[tp->chipset].name);
  12.973 -
  12.974 -	/* Find the connected MII xcvrs.
  12.975 -	   Doing this in open() would allow detecting external xcvrs later, but
  12.976 -	   takes too much time. */
  12.977 -#ifdef CONFIG_8139TOO_8129
  12.978 -	if (tp->drv_flags & HAS_MII_XCVR) {
  12.979 -		int phy, phy_idx = 0;
  12.980 -		for (phy = 0; phy < 32 && phy_idx < sizeof(tp->phys); phy++) {
  12.981 -			int mii_status = mdio_read(dev, phy, 1);
  12.982 -			if (mii_status != 0xffff  &&  mii_status != 0x0000) {
  12.983 -				u16 advertising = mdio_read(dev, phy, 4);
  12.984 -				tp->phys[phy_idx++] = phy;
  12.985 -				printk(KERN_INFO "%s: MII transceiver %d status 0x%4.4x "
  12.986 -					   "advertising %4.4x.\n",
  12.987 -					   dev->name, phy, mii_status, advertising);
  12.988 -			}
  12.989 -		}
  12.990 -		if (phy_idx == 0) {
  12.991 -			printk(KERN_INFO "%s: No MII transceivers found!  Assuming SYM "
  12.992 -				   "transceiver.\n",
  12.993 -				   dev->name);
  12.994 -			tp->phys[0] = 32;
  12.995 -		}
  12.996 -	} else
  12.997 -#endif
  12.998 -		tp->phys[0] = 32;
  12.999 -
 12.1000 -	/* The lower four bits are the media type. */
 12.1001 -	option = (board_idx >= MAX_UNITS) ? 0 : media[board_idx];
 12.1002 -	if (option > 0) {
 12.1003 -		tp->full_duplex = (option & 0x210) ? 1 : 0;
 12.1004 -		tp->default_port = option & 0xFF;
 12.1005 -		if (tp->default_port)
 12.1006 -			tp->medialock = 1;
 12.1007 -	}
 12.1008 -	if (board_idx < MAX_UNITS  &&  full_duplex[board_idx] > 0)
 12.1009 -		tp->full_duplex = full_duplex[board_idx];
 12.1010 -	if (tp->full_duplex) {
 12.1011 -		printk(KERN_INFO "%s: Media type forced to Full Duplex.\n", dev->name);
 12.1012 -		/* Changing the MII-advertised media because might prevent
 12.1013 -		   re-connection. */
 12.1014 -		tp->duplex_lock = 1;
 12.1015 -	}
 12.1016 -	if (tp->default_port) {
 12.1017 -		printk(KERN_INFO "  Forcing %dMbps %s-duplex operation.\n",
 12.1018 -			   (option & 0x20 ? 100 : 10),
 12.1019 -			   (option & 0x10 ? "full" : "half"));
 12.1020 -		mdio_write(dev, tp->phys[0], 0,
 12.1021 -				   ((option & 0x20) ? 0x2000 : 0) | 	/* 100Mbps? */
 12.1022 -				   ((option & 0x10) ? 0x0100 : 0)); /* Full duplex? */
 12.1023 -	}
 12.1024 -
 12.1025 -	/* Put the chip into low-power mode. */
 12.1026 -	if (rtl_chip_info[tp->chipset].flags & HasHltClk)
 12.1027 -		RTL_W8 (HltClk, 'H');	/* 'R' would leave the clock running. */
 12.1028 -
 12.1029 -	return 0;
 12.1030 -
 12.1031 -err_out:
 12.1032 -	__rtl8139_cleanup_dev (dev);
 12.1033 -	return i;
 12.1034 -}
 12.1035 -
 12.1036 -
 12.1037 -static void __devexit rtl8139_remove_one (struct pci_dev *pdev)
 12.1038 -{
 12.1039 -	struct net_device *dev = pci_get_drvdata (pdev);
 12.1040 -	struct rtl8139_private *np;
 12.1041 -
 12.1042 -	assert (dev != NULL);
 12.1043 -	np = dev->priv;
 12.1044 -	assert (np != NULL);
 12.1045 -
 12.1046 -	unregister_netdev (dev);
 12.1047 -
 12.1048 -	__rtl8139_cleanup_dev (dev);
 12.1049 -}
 12.1050 -
 12.1051 -
 12.1052 -/* Serial EEPROM section. */
 12.1053 -
 12.1054 -/*  EEPROM_Ctrl bits. */
 12.1055 -#define EE_SHIFT_CLK	0x04	/* EEPROM shift clock. */
 12.1056 -#define EE_CS			0x08	/* EEPROM chip select. */
 12.1057 -#define EE_DATA_WRITE	0x02	/* EEPROM chip data in. */
 12.1058 -#define EE_WRITE_0		0x00
 12.1059 -#define EE_WRITE_1		0x02
 12.1060 -#define EE_DATA_READ	0x01	/* EEPROM chip data out. */
 12.1061 -#define EE_ENB			(0x80 | EE_CS)
 12.1062 -
 12.1063 -/* Delay between EEPROM clock transitions.
 12.1064 -   No extra delay is needed with 33Mhz PCI, but 66Mhz may change this.
 12.1065 - */
 12.1066 -
 12.1067 -#define eeprom_delay()	readl(ee_addr)
 12.1068 -
 12.1069 -/* The EEPROM commands include the alway-set leading bit. */
 12.1070 -#define EE_WRITE_CMD	(5)
 12.1071 -#define EE_READ_CMD		(6)
 12.1072 -#define EE_ERASE_CMD	(7)
 12.1073 -
 12.1074 -static int __devinit read_eeprom (void *ioaddr, int location, int addr_len)
 12.1075 -{
 12.1076 -	int i;
 12.1077 -	unsigned retval = 0;
 12.1078 -	void *ee_addr = ioaddr + Cfg9346;
 12.1079 -	int read_cmd = location | (EE_READ_CMD << addr_len);
 12.1080 -
 12.1081 -	writeb (EE_ENB & ~EE_CS, ee_addr);
 12.1082 -	writeb (EE_ENB, ee_addr);
 12.1083 -	eeprom_delay ();
 12.1084 -
 12.1085 -	/* Shift the read command bits out. */
 12.1086 -	for (i = 4 + addr_len; i >= 0; i--) {
 12.1087 -		int dataval = (read_cmd & (1 << i)) ? EE_DATA_WRITE : 0;
 12.1088 -		writeb (EE_ENB | dataval, ee_addr);
 12.1089 -		eeprom_delay ();
 12.1090 -		writeb (EE_ENB | dataval | EE_SHIFT_CLK, ee_addr);
 12.1091 -		eeprom_delay ();
 12.1092 -	}
 12.1093 -	writeb (EE_ENB, ee_addr);
 12.1094 -	eeprom_delay ();
 12.1095 -
 12.1096 -	for (i = 16; i > 0; i--) {
 12.1097 -		writeb (EE_ENB | EE_SHIFT_CLK, ee_addr);
 12.1098 -		eeprom_delay ();
 12.1099 -		retval =
 12.1100 -		    (retval << 1) | ((readb (ee_addr) & EE_DATA_READ) ? 1 :
 12.1101 -				     0);
 12.1102 -		writeb (EE_ENB, ee_addr);
 12.1103 -		eeprom_delay ();
 12.1104 -	}
 12.1105 -
 12.1106 -	/* Terminate the EEPROM access. */
 12.1107 -	writeb (~EE_CS, ee_addr);
 12.1108 -	eeprom_delay ();
 12.1109 -
 12.1110 -	return retval;
 12.1111 -}
 12.1112 -
 12.1113 -/* MII serial management: mostly bogus for now. */
 12.1114 -/* Read and write the MII management registers using software-generated
 12.1115 -   serial MDIO protocol.
 12.1116 -   The maximum data clock rate is 2.5 Mhz.  The minimum timing is usually
 12.1117 -   met by back-to-back PCI I/O cycles, but we insert a delay to avoid
 12.1118 -   "overclocking" issues. */
 12.1119 -#define MDIO_DIR		0x80
 12.1120 -#define MDIO_DATA_OUT	0x04
 12.1121 -#define MDIO_DATA_IN	0x02
 12.1122 -#define MDIO_CLK		0x01
 12.1123 -#define MDIO_WRITE0 (MDIO_DIR)
 12.1124 -#define MDIO_WRITE1 (MDIO_DIR | MDIO_DATA_OUT)
 12.1125 -
 12.1126 -#define mdio_delay(mdio_addr)	readb(mdio_addr)
 12.1127 -
 12.1128 -
 12.1129 -static char mii_2_8139_map[8] = {
 12.1130 -	BasicModeCtrl,
 12.1131 -	BasicModeStatus,
 12.1132 -	0,
 12.1133 -	0,
 12.1134 -	NWayAdvert,
 12.1135 -	NWayLPAR,
 12.1136 -	NWayExpansion,
 12.1137 -	0
 12.1138 -};
 12.1139 -
 12.1140 -
 12.1141 -#ifdef CONFIG_8139TOO_8129
 12.1142 -/* Syncronize the MII management interface by shifting 32 one bits out. */
 12.1143 -static void mdio_sync (void *mdio_addr)
 12.1144 -{
 12.1145 -	int i;
 12.1146 -
 12.1147 -	for (i = 32; i >= 0; i--) {
 12.1148 -		writeb (MDIO_WRITE1, mdio_addr);
 12.1149 -		mdio_delay (mdio_addr);
 12.1150 -		writeb (MDIO_WRITE1 | MDIO_CLK, mdio_addr);
 12.1151 -		mdio_delay (mdio_addr);
 12.1152 -	}
 12.1153 -}
 12.1154 -#endif
 12.1155 -
 12.1156 -static int mdio_read (struct net_device *dev, int phy_id, int location)
 12.1157 -{
 12.1158 -	struct rtl8139_private *tp = dev->priv;
 12.1159 -	int retval = 0;
 12.1160 -#ifdef CONFIG_8139TOO_8129
 12.1161 -	void *mdio_addr = tp->mmio_addr + Config4;
 12.1162 -	int mii_cmd = (0xf6 << 10) | (phy_id << 5) | location;
 12.1163 -	int i;
 12.1164 -#endif
 12.1165 -
 12.1166 -	if (phy_id > 31) {	/* Really a 8139.  Use internal registers. */
 12.1167 -		return location < 8 && mii_2_8139_map[location] ?
 12.1168 -		    readw (tp->mmio_addr + mii_2_8139_map[location]) : 0;
 12.1169 -	}
 12.1170 -
 12.1171 -#ifdef CONFIG_8139TOO_8129
 12.1172 -	mdio_sync (mdio_addr);
 12.1173 -	/* Shift the read command bits out. */
 12.1174 -	for (i = 15; i >= 0; i--) {
 12.1175 -		int dataval = (mii_cmd & (1 << i)) ? MDIO_DATA_OUT : 0;
 12.1176 -
 12.1177 -		writeb (MDIO_DIR | dataval, mdio_addr);
 12.1178 -		mdio_delay (mdio_addr);
 12.1179 -		writeb (MDIO_DIR | dataval | MDIO_CLK, mdio_addr);
 12.1180 -		mdio_delay (mdio_addr);
 12.1181 -	}
 12.1182 -
 12.1183 -	/* Read the two transition, 16 data, and wire-idle bits. */
 12.1184 -	for (i = 19; i > 0; i--) {
 12.1185 -		writeb (0, mdio_addr);
 12.1186 -		mdio_delay (mdio_addr);
 12.1187 -		retval = (retval << 1) | ((readb (mdio_addr) & MDIO_DATA_IN) ? 1 : 0);
 12.1188 -		writeb (MDIO_CLK, mdio_addr);
 12.1189 -		mdio_delay (mdio_addr);
 12.1190 -	}
 12.1191 -#endif
 12.1192 -
 12.1193 -	return (retval >> 1) & 0xffff;
 12.1194 -}
 12.1195 -
 12.1196 -
 12.1197 -static void mdio_write (struct net_device *dev, int phy_id, int location,
 12.1198 -			int value)
 12.1199 -{
 12.1200 -	struct rtl8139_private *tp = dev->priv;
 12.1201 -#ifdef CONFIG_8139TOO_8129
 12.1202 -	void *mdio_addr = tp->mmio_addr + Config4;
 12.1203 -	int mii_cmd = (0x5002 << 16) | (phy_id << 23) | (location << 18) | value;
 12.1204 -	int i;
 12.1205 -#endif
 12.1206 -
 12.1207 -	if (phy_id > 31) {	/* Really a 8139.  Use internal registers. */
 12.1208 -		void *ioaddr = tp->mmio_addr;
 12.1209 -		if (location == 0) {
 12.1210 -			RTL_W8 (Cfg9346, Cfg9346_Unlock);
 12.1211 -			RTL_W16 (BasicModeCtrl, value);
 12.1212 -			RTL_W8 (Cfg9346, Cfg9346_Lock);
 12.1213 -		} else if (location < 8 && mii_2_8139_map[location])
 12.1214 -			RTL_W16 (mii_2_8139_map[location], value);
 12.1215 -		return;
 12.1216 -	}
 12.1217 -
 12.1218 -#ifdef CONFIG_8139TOO_8129
 12.1219 -	mdio_sync (mdio_addr);
 12.1220 -
 12.1221 -	/* Shift the command bits out. */
 12.1222 -	for (i = 31; i >= 0; i--) {
 12.1223 -		int dataval =
 12.1224 -		    (mii_cmd & (1 << i)) ? MDIO_WRITE1 : MDIO_WRITE0;
 12.1225 -		writeb (dataval, mdio_addr);
 12.1226 -		mdio_delay (mdio_addr);
 12.1227 -		writeb (dataval | MDIO_CLK, mdio_addr);
 12.1228 -		mdio_delay (mdio_addr);
 12.1229 -	}
 12.1230 -	/* Clear out extra bits. */
 12.1231 -	for (i = 2; i > 0; i--) {
 12.1232 -		writeb (0, mdio_addr);
 12.1233 -		mdio_delay (mdio_addr);
 12.1234 -		writeb (MDIO_CLK, mdio_addr);
 12.1235 -		mdio_delay (mdio_addr);
 12.1236 -	}
 12.1237 -#endif
 12.1238 -}
 12.1239 -
 12.1240 -
 12.1241 -static int rtl8139_open (struct net_device *dev)
 12.1242 -{
 12.1243 -	struct rtl8139_private *tp = dev->priv;
 12.1244 -	int retval;
 12.1245 -#ifdef RTL8139_DEBUG
 12.1246 -	void *ioaddr = tp->mmio_addr;
 12.1247 -#endif
 12.1248 -
 12.1249 -	retval = request_irq (dev->irq, rtl8139_interrupt, SA_SHIRQ, dev->name, dev);
 12.1250 -	if (retval)
 12.1251 -		return retval;
 12.1252 -
 12.1253 -	tp->tx_bufs = pci_alloc_consistent(tp->pci_dev, TX_BUF_TOT_LEN,
 12.1254 -					   &tp->tx_bufs_dma);
 12.1255 -	tp->rx_ring = pci_alloc_consistent(tp->pci_dev, RX_BUF_TOT_LEN,
 12.1256 -					   &tp->rx_ring_dma);
 12.1257 -	if (tp->tx_bufs == NULL || tp->rx_ring == NULL) {
 12.1258 -		free_irq(dev->irq, dev);
 12.1259 -
 12.1260 -		if (tp->tx_bufs)
 12.1261 -			pci_free_consistent(tp->pci_dev, TX_BUF_TOT_LEN,
 12.1262 -					    tp->tx_bufs, tp->tx_bufs_dma);
 12.1263 -		if (tp->rx_ring)
 12.1264 -			pci_free_consistent(tp->pci_dev, RX_BUF_TOT_LEN,
 12.1265 -					    tp->rx_ring, tp->rx_ring_dma);
 12.1266 -
 12.1267 -		return -ENOMEM;
 12.1268 -
 12.1269 -	}
 12.1270 -
 12.1271 -	tp->full_duplex = tp->duplex_lock;
 12.1272 -	tp->tx_flag = (TX_FIFO_THRESH << 11) & 0x003f0000;
 12.1273 -	tp->twistie = 1;
 12.1274 -	tp->time_to_die = 0;
 12.1275 -
 12.1276 -	rtl8139_init_ring (dev);
 12.1277 -	rtl8139_hw_start (dev);
 12.1278 -
 12.1279 -	DPRINTK ("%s: rtl8139_open() ioaddr %#lx IRQ %d"
 12.1280 -			" GP Pins %2.2x %s-duplex.\n",
 12.1281 -			dev->name, pci_resource_start (tp->pci_dev, 1),
 12.1282 -			dev->irq, RTL_R8 (MediaStatus),
 12.1283 -			tp->full_duplex ? "full" : "half");
 12.1284 -
 12.1285 -	return 0;
 12.1286 -}
 12.1287 -
 12.1288 -
 12.1289 -static void rtl_check_media (struct net_device *dev)
 12.1290 -{
 12.1291 -	struct rtl8139_private *tp = dev->priv;
 12.1292 -
 12.1293 -	if (tp->phys[0] >= 0) {
 12.1294 -		u16 mii_reg5 = mdio_read(dev, tp->phys[0], 5);
 12.1295 -		if (mii_reg5 == 0xffff)
 12.1296 -			;					/* Not there */
 12.1297 -		else if ((mii_reg5 & 0x0100) == 0x0100
 12.1298 -				 || (mii_reg5 & 0x00C0) == 0x0040)
 12.1299 -			tp->full_duplex = 1;
 12.1300 -
 12.1301 -		printk (KERN_INFO"%s: Setting %s%s-duplex based on"
 12.1302 -				" auto-negotiated partner ability %4.4x.\n",
 12.1303 -		        dev->name, mii_reg5 == 0 ? "" :
 12.1304 -				(mii_reg5 & 0x0180) ? "100mbps " : "10mbps ",
 12.1305 -			tp->full_duplex ? "full" : "half", mii_reg5);
 12.1306 -	}
 12.1307 -}
 12.1308 -
 12.1309 -/* Start the hardware at open or resume. */
 12.1310 -static void rtl8139_hw_start (struct net_device *dev)
 12.1311 -{
 12.1312 -	struct rtl8139_private *tp = dev->priv;
 12.1313 -	void *ioaddr = tp->mmio_addr;
 12.1314 -	u32 i;
 12.1315 -	u8 tmp;
 12.1316 -
 12.1317 -	/* Bring old chips out of low-power mode. */
 12.1318 -	if (rtl_chip_info[tp->chipset].flags & HasHltClk)
 12.1319 -		RTL_W8 (HltClk, 'R');
 12.1320 -
 12.1321 -	rtl8139_chip_reset (ioaddr);
 12.1322 -
 12.1323 -	/* unlock Config[01234] and BMCR register writes */
 12.1324 -	RTL_W8_F (Cfg9346, Cfg9346_Unlock);
 12.1325 -	/* Restore our idea of the MAC address. */
 12.1326 -	RTL_W32_F (MAC0 + 0, cpu_to_le32 (*(u32 *) (dev->dev_addr + 0)));
 12.1327 -	RTL_W32_F (MAC0 + 4, cpu_to_le32 (*(u32 *) (dev->dev_addr + 4)));
 12.1328 -
 12.1329 -	/* Must enable Tx/Rx before setting transfer thresholds! */
 12.1330 -	RTL_W8 (ChipCmd, CmdRxEnb | CmdTxEnb);
 12.1331 -
 12.1332 -	tp->rx_config = rtl8139_rx_config | AcceptBroadcast | AcceptMyPhys;
 12.1333 -	RTL_W32 (RxConfig, tp->rx_config);
 12.1334 -
 12.1335 -	/* Check this value: the documentation for IFG contradicts ifself. */
 12.1336 -	RTL_W32 (TxConfig, rtl8139_tx_config);
 12.1337 -
 12.1338 -	tp->cur_rx = 0;
 12.1339 -
 12.1340 -	rtl_check_media (dev);
 12.1341 -
 12.1342 -	if (tp->chipset >= CH_8139B) {
 12.1343 -		/* Disable magic packet scanning, which is enabled
 12.1344 -		 * when PM is enabled in Config1.  It can be reenabled
 12.1345 -		 * via ETHTOOL_SWOL if desired.  */
 12.1346 -		RTL_W8 (Config3, RTL_R8 (Config3) & ~Cfg3_Magic);
 12.1347 -	}
 12.1348 -
 12.1349 -	DPRINTK("init buffer addresses\n");
 12.1350 -
 12.1351 -	/* Lock Config[01234] and BMCR register writes */
 12.1352 -	RTL_W8 (Cfg9346, Cfg9346_Lock);
 12.1353 -
 12.1354 -	/* init Rx ring buffer DMA address */
 12.1355 -	RTL_W32_F (RxBuf, tp->rx_ring_dma);
 12.1356 -
 12.1357 -	/* init Tx buffer DMA addresses */
 12.1358 -	for (i = 0; i < NUM_TX_DESC; i++)
 12.1359 -		RTL_W32_F (TxAddr0 + (i * 4), tp->tx_bufs_dma + (tp->tx_buf[i] - tp->tx_bufs));
 12.1360 -
 12.1361 -	RTL_W32 (RxMissed, 0);
 12.1362 -
 12.1363 -	rtl8139_set_rx_mode (dev);
 12.1364 -
 12.1365 -	/* no early-rx interrupts */
 12.1366 -	RTL_W16 (MultiIntr, RTL_R16 (MultiIntr) & MultiIntrClear);
 12.1367 -
 12.1368 -	/* make sure RxTx has started */
 12.1369 -	tmp = RTL_R8 (ChipCmd);
 12.1370 -	if ((!(tmp & CmdRxEnb)) || (!(tmp & CmdTxEnb)))
 12.1371 -		RTL_W8 (ChipCmd, CmdRxEnb | CmdTxEnb);
 12.1372 -
 12.1373 -	/* Enable all known interrupts by setting the interrupt mask. */
 12.1374 -	RTL_W16 (IntrMask, rtl8139_intr_mask);
 12.1375 -
 12.1376 -	netif_start_queue (dev);
 12.1377 -}
 12.1378 -
 12.1379 -
 12.1380 -/* Initialize the Rx and Tx rings, along with various 'dev' bits. */
 12.1381 -static void rtl8139_init_ring (struct net_device *dev)
 12.1382 -{
 12.1383 -	struct rtl8139_private *tp = dev->priv;
 12.1384 -	int i;
 12.1385 -
 12.1386 -	tp->cur_rx = 0;
 12.1387 -	tp->cur_tx = 0;
 12.1388 -	tp->dirty_tx = 0;
 12.1389 -
 12.1390 -	for (i = 0; i < NUM_TX_DESC; i++)
 12.1391 -		tp->tx_buf[i] = &tp->tx_bufs[i * TX_BUF_SIZE];
 12.1392 -}
 12.1393 -
 12.1394 -
 12.1395 -static void rtl8139_tx_clear (struct rtl8139_private *tp)
 12.1396 -{
 12.1397 -	tp->cur_tx = 0;
 12.1398 -	tp->dirty_tx = 0;
 12.1399 -
 12.1400 -	/* XXX account for unsent Tx packets in tp->stats.tx_dropped */
 12.1401 -}
 12.1402 -
 12.1403 -
 12.1404 -static void rtl8139_tx_timeout (struct net_device *dev)
 12.1405 -{
 12.1406 -	struct rtl8139_private *tp = dev->priv;
 12.1407 -	void *ioaddr = tp->mmio_addr;
 12.1408 -	int i;
 12.1409 -	u8 tmp8;
 12.1410 -	unsigned long flags;
 12.1411 -
 12.1412 -	DPRINTK ("%s: Transmit timeout, status %2.2x %4.4x "
 12.1413 -		 "media %2.2x.\n", dev->name,
 12.1414 -		 RTL_R8 (ChipCmd),
 12.1415 -		 RTL_R16 (IntrStatus),
 12.1416 -		 RTL_R8 (MediaStatus));
 12.1417 -
 12.1418 -	tp->xstats.tx_timeouts++;
 12.1419 -
 12.1420 -	/* disable Tx ASAP, if not already */
 12.1421 -	tmp8 = RTL_R8 (ChipCmd);
 12.1422 -	if (tmp8 & CmdTxEnb)
 12.1423 -		RTL_W8 (ChipCmd, CmdRxEnb);
 12.1424 -
 12.1425 -	/* Disable interrupts by clearing the interrupt mask. */
 12.1426 -	RTL_W16 (IntrMask, 0x0000);
 12.1427 -
 12.1428 -	/* Emit info to figure out what went wrong. */
 12.1429 -	printk (KERN_DEBUG "%s: Tx queue start entry %ld  dirty entry %ld.\n",
 12.1430 -		dev->name, tp->cur_tx, tp->dirty_tx);
 12.1431 -	for (i = 0; i < NUM_TX_DESC; i++)
 12.1432 -		printk (KERN_DEBUG "%s:  Tx descriptor %d is %8.8lx.%s\n",
 12.1433 -			dev->name, i, RTL_R32 (TxStatus0 + (i * 4)),
 12.1434 -			i == tp->dirty_tx % NUM_TX_DESC ?
 12.1435 -				" (queue head)" : "");
 12.1436 -
 12.1437 -	/* Stop a shared interrupt from scavenging while we are. */
 12.1438 -	spin_lock_irqsave (&tp->lock, flags);
 12.1439 -	rtl8139_tx_clear (tp);
 12.1440 -	spin_unlock_irqrestore (&tp->lock, flags);
 12.1441 -
 12.1442 -	/* ...and finally, reset everything */
 12.1443 -	rtl8139_hw_start (dev);
 12.1444 -
 12.1445 -	netif_wake_queue (dev);
 12.1446 -}
 12.1447 -
 12.1448 -
 12.1449 -static int rtl8139_start_xmit (struct sk_buff *skb, struct net_device *dev)
 12.1450 -{
 12.1451 -	struct rtl8139_private *tp = dev->priv;
 12.1452 -	void *ioaddr = tp->mmio_addr;
 12.1453 -	unsigned int entry;
 12.1454 -
 12.1455 -	/* Calculate the next Tx descriptor entry. */
 12.1456 -	entry = tp->cur_tx % NUM_TX_DESC;
 12.1457 -
 12.1458 -	if (likely(skb->len < TX_BUF_SIZE)) {
 12.1459 -		skb_copy_bits(skb, 0, tp->tx_buf[entry], skb->len);
 12.1460 -		dev_kfree_skb(skb);
 12.1461 -	} else {
 12.1462 -		dev_kfree_skb(skb);
 12.1463 -		tp->stats.tx_dropped++;
 12.1464 -		return 0;
 12.1465 -  	}
 12.1466 -
 12.1467 -	/* Note: the chip doesn't have auto-pad! */
 12.1468 -	spin_lock_irq(&tp->lock);
 12.1469 -	RTL_W32_F (TxStatus0 + (entry * sizeof (u32)),
 12.1470 -		   tp->tx_flag | (skb->len >= ETH_ZLEN ? skb->len : ETH_ZLEN));
 12.1471 -
 12.1472 -	dev->trans_start = jiffies;
 12.1473 -
 12.1474 -	tp->cur_tx++;
 12.1475 -	wmb();
 12.1476 -
 12.1477 -	if ((tp->cur_tx - NUM_TX_DESC) == tp->dirty_tx)
 12.1478 -		netif_stop_queue (dev);
 12.1479 -	spin_unlock_irq(&tp->lock);
 12.1480 -
 12.1481 -	DPRINTK ("%s: Queued Tx packet at %p size %u to slot %d.\n",
 12.1482 -		 dev->name, skb->data, skb->len, entry);
 12.1483 -
 12.1484 -	return 0;
 12.1485 -}
 12.1486 -
 12.1487 -
 12.1488 -static void rtl8139_tx_interrupt (struct net_device *dev,
 12.1489 -				  struct rtl8139_private *tp,
 12.1490 -				  void *ioaddr)
 12.1491 -{
 12.1492 -	unsigned long dirty_tx, tx_left;
 12.1493 -
 12.1494 -	assert (dev != NULL);
 12.1495 -	assert (tp != NULL);
 12.1496 -	assert (ioaddr != NULL);
 12.1497 -
 12.1498 -	dirty_tx = tp->dirty_tx;
 12.1499 -	tx_left = tp->cur_tx - dirty_tx;
 12.1500 -	while (tx_left > 0) {
 12.1501 -		int entry = dirty_tx % NUM_TX_DESC;
 12.1502 -		int txstatus;
 12.1503 -
 12.1504 -		txstatus = RTL_R32 (TxStatus0 + (entry * sizeof (u32)));
 12.1505 -
 12.1506 -		if (!(txstatus & (TxStatOK | TxUnderrun | TxAborted)))
 12.1507 -			break;	/* It still hasn't been Txed */
 12.1508 -
 12.1509 -		/* Note: TxCarrierLost is always asserted at 100mbps. */
 12.1510 -		if (txstatus & (TxOutOfWindow | TxAborted)) {
 12.1511 -			/* There was an major error, log it. */
 12.1512 -			DPRINTK ("%s: Transmit error, Tx status %8.8x.\n",
 12.1513 -				 dev->name, txstatus);
 12.1514 -			tp->stats.tx_errors++;
 12.1515 -			if (txstatus & TxAborted) {
 12.1516 -				tp->stats.tx_aborted_errors++;
 12.1517 -				RTL_W32 (TxConfig, TxClearAbt);
 12.1518 -				RTL_W16 (IntrStatus, TxErr);
 12.1519 -				wmb();
 12.1520 -			}
 12.1521 -			if (txstatus & TxCarrierLost)
 12.1522 -				tp->stats.tx_carrier_errors++;
 12.1523 -			if (txstatus & TxOutOfWindow)
 12.1524 -				tp->stats.tx_window_errors++;
 12.1525 -#ifdef ETHER_STATS
 12.1526 -			if ((txstatus & 0x0f000000) == 0x0f000000)
 12.1527 -				tp->stats.collisions16++;
 12.1528 -#endif
 12.1529 -		} else {
 12.1530 -			if (txstatus & TxUnderrun) {
 12.1531 -				/* Add 64 to the Tx FIFO threshold. */
 12.1532 -				if (tp->tx_flag < 0x00300000)
 12.1533 -					tp->tx_flag += 0x00020000;
 12.1534 -				tp->stats.tx_fifo_errors++;
 12.1535 -			}
 12.1536 -			tp->stats.collisions += (txstatus >> 24) & 15;
 12.1537 -			tp->stats.tx_bytes += txstatus & 0x7ff;
 12.1538 -			tp->stats.tx_packets++;
 12.1539 -		}
 12.1540 -
 12.1541 -		dirty_tx++;
 12.1542 -		tx_left--;
 12.1543 -	}
 12.1544 -
 12.1545 -#ifndef RTL8139_NDEBUG
 12.1546 -	if (tp->cur_tx - dirty_tx > NUM_TX_DESC) {
 12.1547 -		printk (KERN_ERR "%s: Out-of-sync dirty pointer, %ld vs. %ld.\n",
 12.1548 -		        dev->name, dirty_tx, tp->cur_tx);
 12.1549 -		dirty_tx += NUM_TX_DESC;
 12.1550 -	}
 12.1551 -#endif /* RTL8139_NDEBUG */
 12.1552 -
 12.1553 -	/* only wake the queue if we did work, and the queue is stopped */
 12.1554 -	if (tp->dirty_tx != dirty_tx) {
 12.1555 -		tp->dirty_tx = dirty_tx;
 12.1556 -		mb();
 12.1557 -		if (netif_queue_stopped (dev))
 12.1558 -			netif_wake_queue (dev);
 12.1559 -	}
 12.1560 -}
 12.1561 -
 12.1562 -
 12.1563 -/* TODO: clean this up!  Rx reset need not be this intensive */
 12.1564 -static void rtl8139_rx_err (u32 rx_status, struct net_device *dev,
 12.1565 -			    struct rtl8139_private *tp, void *ioaddr)
 12.1566 -{
 12.1567 -	u8 tmp8;
 12.1568 -	int tmp_work;
 12.1569 -
 12.1570 -	DPRINTK ("%s: Ethernet frame had errors, status %8.8x.\n",
 12.1571 -	         dev->name, rx_status);
 12.1572 -	if (rx_status & RxTooLong) {
 12.1573 -		DPRINTK ("%s: Oversized Ethernet frame, status %4.4x!\n",
 12.1574 -			 dev->name, rx_status);
 12.1575 -		/* A.C.: The chip hangs here. */
 12.1576 -	}
 12.1577 -	tp->stats.rx_errors++;
 12.1578 -	if (rx_status & (RxBadSymbol | RxBadAlign))
 12.1579 -		tp->stats.rx_frame_errors++;
 12.1580 -	if (rx_status & (RxRunt | RxTooLong))
 12.1581 -		tp->stats.rx_length_errors++;
 12.1582 -	if (rx_status & RxCRCErr)
 12.1583 -		tp->stats.rx_crc_errors++;
 12.1584 -
 12.1585 -	/* Reset the receiver, based on RealTek recommendation. (Bug?) */
 12.1586 -
 12.1587 -	/* disable receive */
 12.1588 -	RTL_W8_F (ChipCmd, CmdTxEnb);
 12.1589 -	tmp_work = 200;
 12.1590 -	while (--tmp_work > 0) {
 12.1591 -		udelay(1);
 12.1592 -		tmp8 = RTL_R8 (ChipCmd);
 12.1593 -		if (!(tmp8 & CmdRxEnb))
 12.1594 -			break;
 12.1595 -	}
 12.1596 -	if (tmp_work <= 0)
 12.1597 -		printk (KERN_WARNING PFX "rx stop wait too long\n");
 12.1598 -	/* restart receive */
 12.1599 -	tmp_work = 200;
 12.1600 -	while (--tmp_work > 0) {
 12.1601 -		RTL_W8_F (ChipCmd, CmdRxEnb | CmdTxEnb);
 12.1602 -		udelay(1);
 12.1603 -		tmp8 = RTL_R8 (ChipCmd);
 12.1604 -		if ((tmp8 & CmdRxEnb) && (tmp8 & CmdTxEnb))
 12.1605 -			break;
 12.1606 -	}
 12.1607 -	if (tmp_work <= 0)
 12.1608 -		printk (KERN_WARNING PFX "tx/rx enable wait too long\n");
 12.1609 -
 12.1610 -	/* and reinitialize all rx related registers */
 12.1611 -	RTL_W8_F (Cfg9346, Cfg9346_Unlock);
 12.1612 -	/* Must enable Tx/Rx before setting transfer thresholds! */
 12.1613 -	RTL_W8 (ChipCmd, CmdRxEnb | CmdTxEnb);
 12.1614 -
 12.1615 -	tp->rx_config = rtl8139_rx_config | AcceptBroadcast | AcceptMyPhys;
 12.1616 -	RTL_W32 (RxConfig, tp->rx_config);
 12.1617 -	tp->cur_rx = 0;
 12.1618 -
 12.1619 -	DPRINTK("init buffer addresses\n");
 12.1620 -
 12.1621 -	/* Lock Config[01234] and BMCR register writes */
 12.1622 -	RTL_W8 (Cfg9346, Cfg9346_Lock);
 12.1623 -
 12.1624 -	/* init Rx ring buffer DMA address */
 12.1625 -	RTL_W32_F (RxBuf, tp->rx_ring_dma);
 12.1626 -
 12.1627 -	/* A.C.: Reset the multicast list. */
 12.1628 -	__set_rx_mode (dev);
 12.1629 -}
 12.1630 -
 12.1631 -static void rtl8139_rx_interrupt (struct net_device *dev,
 12.1632 -				  struct rtl8139_private *tp, void *ioaddr)
 12.1633 -{
 12.1634 -	unsigned char *rx_ring;
 12.1635 -	u16 cur_rx;
 12.1636 -
 12.1637 -	assert (dev != NULL);
 12.1638 -	assert (tp != NULL);
 12.1639 -	assert (ioaddr != NULL);
 12.1640 -
 12.1641 -	rx_ring = tp->rx_ring;
 12.1642 -	cur_rx = tp->cur_rx;
 12.1643 -
 12.1644 -	DPRINTK ("%s: In rtl8139_rx(), current %4.4x BufAddr %4.4x,"
 12.1645 -		 " free to %4.4x, Cmd %2.2x.\n", dev->name, cur_rx,
 12.1646 -		 RTL_R16 (RxBufAddr),
 12.1647 -		 RTL_R16 (RxBufPtr), RTL_R8 (ChipCmd));
 12.1648 -
 12.1649 -	while ((RTL_R8 (ChipCmd) & RxBufEmpty) == 0) {
 12.1650 -		int ring_offset = cur_rx % RX_BUF_LEN;
 12.1651 -		u32 rx_status;
 12.1652 -		unsigned int rx_size;
 12.1653 -		unsigned int pkt_size;
 12.1654 -		struct sk_buff *skb;
 12.1655 -
 12.1656 -		rmb();
 12.1657 -
 12.1658 -		/* read size+status of next frame from DMA ring buffer */
 12.1659 -		rx_status = le32_to_cpu (*(u32 *) (rx_ring + ring_offset));
 12.1660 -		rx_size = rx_status >> 16;
 12.1661 -		pkt_size = rx_size - 4;
 12.1662 -
 12.1663 -		DPRINTK ("%s:  rtl8139_rx() status %4.4x, size %4.4x,"
 12.1664 -			 " cur %4.4x.\n", dev->name, rx_status,
 12.1665 -			 rx_size, cur_rx);
 12.1666 -#if RTL8139_DEBUG > 2
 12.1667 -		{
 12.1668 -			int i;
 12.1669 -			DPRINTK ("%s: Frame contents ", dev->name);
 12.1670 -			for (i = 0; i < 70; i++)
 12.1671 -				printk (" %2.2x",
 12.1672 -					rx_ring[ring_offset + i]);
 12.1673 -			printk (".\n");
 12.1674 -		}
 12.1675 -#endif
 12.1676 -
 12.1677 -		/* Packet copy from FIFO still in progress.
 12.1678 -		 * Theoretically, this should never happen
 12.1679 -		 * since EarlyRx is disabled.
 12.1680 -		 */
 12.1681 -		if (rx_size == 0xfff0) {
 12.1682 -			tp->xstats.early_rx++;
 12.1683 -			break;
 12.1684 -		}
 12.1685 -
 12.1686 -		/* If Rx err or invalid rx_size/rx_status received
 12.1687 -		 * (which happens if we get lost in the ring),
 12.1688 -		 * Rx process gets reset, so we abort any further
 12.1689 -		 * Rx processing.
 12.1690 -		 */
 12.1691 -		if ((rx_size > (MAX_ETH_FRAME_SIZE+4)) ||
 12.1692 -		    (rx_size < 8) ||
 12.1693 -		    (!(rx_status & RxStatusOK))) {
 12.1694 -			rtl8139_rx_err (rx_status, dev, tp, ioaddr);
 12.1695 -			return;
 12.1696 -		}
 12.1697 -
 12.1698 -		/* Malloc up new buffer, compatible with net-2e. */
 12.1699 -		/* Omit the four octet CRC from the length. */
 12.1700 -
 12.1701 -		/* TODO: consider allocating skb's outside of
 12.1702 -		 * interrupt context, both to speed interrupt processing,
 12.1703 -		 * and also to reduce the chances of having to
 12.1704 -		 * drop packets here under memory pressure.
 12.1705 -		 */
 12.1706 -
 12.1707 -		skb = dev_alloc_skb (pkt_size + 2);
 12.1708 -		if (skb) {
 12.1709 -			skb->dev = dev;
 12.1710 -			skb_reserve (skb, 2);	/* 16 byte align the IP fields. */
 12.1711 -
 12.1712 -			eth_copy_and_sum (skb, &rx_ring[ring_offset + 4], pkt_size, 0);
 12.1713 -			skb_put (skb, pkt_size);
 12.1714 -
 12.1715 -			skb->protocol = eth_type_trans (skb, dev);
 12.1716 -			netif_rx (skb);
 12.1717 -			dev->last_rx = jiffies;
 12.1718 -			tp->stats.rx_bytes += pkt_size;
 12.1719 -			tp->stats.rx_packets++;
 12.1720 -		} else {
 12.1721 -			printk (KERN_WARNING
 12.1722 -				"%s: Memory squeeze, dropping packet.\n",
 12.1723 -				dev->name);
 12.1724 -			tp->stats.rx_dropped++;
 12.1725 -		}
 12.1726 -
 12.1727 -		cur_rx = (cur_rx + rx_size + 4 + 3) & ~3;
 12.1728 -		RTL_W16 (RxBufPtr, cur_rx - 16);
 12.1729 -
 12.1730 -		if (RTL_R16 (IntrStatus) & RxAckBits)
 12.1731 -			RTL_W16_F (IntrStatus, RxAckBits);
 12.1732 -	}
 12.1733 -
 12.1734 -	DPRINTK ("%s: Done rtl8139_rx(), current %4.4x BufAddr %4.4x,"
 12.1735 -		 " free to %4.4x, Cmd %2.2x.\n", dev->name, cur_rx,
 12.1736 -		 RTL_R16 (RxBufAddr),
 12.1737 -		 RTL_R16 (RxBufPtr), RTL_R8 (ChipCmd));
 12.1738 -
 12.1739 -	tp->cur_rx = cur_rx;
 12.1740 -}
 12.1741 -
 12.1742 -
 12.1743 -static void rtl8139_weird_interrupt (struct net_device *dev,
 12.1744 -				     struct rtl8139_private *tp,
 12.1745 -				     void *ioaddr,
 12.1746 -				     int status, int link_changed)
 12.1747 -{
 12.1748 -	DPRINTK ("%s: Abnormal interrupt, status %8.8x.\n",
 12.1749 -		 dev->name, status);
 12.1750 -
 12.1751 -	assert (dev != NULL);
 12.1752 -	assert (tp != NULL);
 12.1753 -	assert (ioaddr != NULL);
 12.1754 -
 12.1755 -	/* Update the error count. */
 12.1756 -	tp->stats.rx_missed_errors += RTL_R32 (RxMissed);
 12.1757 -	RTL_W32 (RxMissed, 0);
 12.1758 -
 12.1759 -	if ((status & RxUnderrun) && link_changed &&
 12.1760 -	    (tp->drv_flags & HAS_LNK_CHNG)) {
 12.1761 -		/* Really link-change on new chips. */
 12.1762 -		int lpar = RTL_R16 (NWayLPAR);
 12.1763 -		int duplex = (lpar & 0x0100) || (lpar & 0x01C0) == 0x0040
 12.1764 -				|| tp->duplex_lock;
 12.1765 -		if (tp->full_duplex != duplex) {
 12.1766 -			tp->full_duplex = duplex;
 12.1767 -#if 0
 12.1768 -			RTL_W8 (Cfg9346, Cfg9346_Unlock);
 12.1769 -			RTL_W8 (Config1, tp->full_duplex ? 0x60 : 0x20);
 12.1770 -			RTL_W8 (Cfg9346, Cfg9346_Lock);
 12.1771 -#endif
 12.1772 -		}
 12.1773 -		status &= ~RxUnderrun;
 12.1774 -	}
 12.1775 -
 12.1776 -	/* XXX along with rtl8139_rx_err, are we double-counting errors? */
 12.1777 -	if (status &
 12.1778 -	    (RxUnderrun | RxOverflow | RxErr | RxFIFOOver))
 12.1779 -		tp->stats.rx_errors++;
 12.1780 -
 12.1781 -	if (status & PCSTimeout)
 12.1782 -		tp->stats.rx_length_errors++;
 12.1783 -	if (status & (RxUnderrun | RxFIFOOver))
 12.1784 -		tp->stats.rx_fifo_errors++;
 12.1785 -	if (status & PCIErr) {
 12.1786 -		u16 pci_cmd_status;
 12.1787 -		pci_read_config_word (tp->pci_dev, PCI_STATUS, &pci_cmd_status);
 12.1788 -		pci_write_config_word (tp->pci_dev, PCI_STATUS, pci_cmd_status);
 12.1789 -
 12.1790 -		printk (KERN_ERR "%s: PCI Bus error %4.4x.\n",
 12.1791 -			dev->name, pci_cmd_status);
 12.1792 -	}
 12.1793 -}
 12.1794 -
 12.1795 -
 12.1796 -/* The interrupt handler does all of the Rx thread work and cleans up
 12.1797 -   after the Tx thread. */
 12.1798 -static void rtl8139_interrupt (int irq, void *dev_instance,
 12.1799 -			       struct pt_regs *regs)
 12.1800 -{
 12.1801 -	struct net_device *dev = (struct net_device *) dev_instance;
 12.1802 -	struct rtl8139_private *tp = dev->priv;
 12.1803 -	int boguscnt = max_interrupt_work;
 12.1804 -	void *ioaddr = tp->mmio_addr;
 12.1805 -	int ackstat, status;
 12.1806 -	int link_changed = 0; /* avoid bogus "uninit" warning */
 12.1807 -
 12.1808 -	spin_lock (&tp->lock);
 12.1809 -
 12.1810 -	do {
 12.1811 -		status = RTL_R16 (IntrStatus);
 12.1812 -
 12.1813 -		/* h/w no longer present (hotplug?) or major error, bail */
 12.1814 -		if (status == 0xFFFF)
 12.1815 -			break;
 12.1816 -
 12.1817 -		if ((status &
 12.1818 -		     (PCIErr | PCSTimeout | RxUnderrun | RxOverflow |
 12.1819 -		      RxFIFOOver | TxErr | TxOK | RxErr | RxOK)) == 0)
 12.1820 -			break;
 12.1821 -
 12.1822 -		/* Acknowledge all of the current interrupt sources ASAP, but
 12.1823 -		   an first get an additional status bit from CSCR. */
 12.1824 -		if (status & RxUnderrun)
 12.1825 -			link_changed = RTL_R16 (CSCR) & CSCR_LinkChangeBit;
 12.1826 -
 12.1827 -		/* The chip takes special action when we clear RxAckBits,
 12.1828 -		 * so we clear them later in rtl8139_rx_interrupt
 12.1829 -		 */
 12.1830 -		ackstat = status & ~(RxAckBits | TxErr);
 12.1831 -		RTL_W16 (IntrStatus, ackstat);
 12.1832 -
 12.1833 -		DPRINTK ("%s: interrupt  status=%#4.4x ackstat=%#4.4x new intstat=%#4.4x.\n",
 12.1834 -			 dev->name, ackstat, status, RTL_R16 (IntrStatus));
 12.1835 -
 12.1836 -		if (netif_running (dev) && (status & RxAckBits))
 12.1837 -			rtl8139_rx_interrupt (dev, tp, ioaddr);
 12.1838 -
 12.1839 -		/* Check uncommon events with one test. */
 12.1840 -		if (status & (PCIErr | PCSTimeout | RxUnderrun | RxOverflow |
 12.1841 -		  	      RxFIFOOver | RxErr))
 12.1842 -			rtl8139_weird_interrupt (dev, tp, ioaddr,
 12.1843 -						 status, link_changed);
 12.1844 -
 12.1845 -		if (netif_running (dev) && (status & (TxOK | TxErr))) {
 12.1846 -			rtl8139_tx_interrupt (dev, tp, ioaddr);
 12.1847 -			if (status & TxErr)
 12.1848 -				RTL_W16 (IntrStatus, TxErr);
 12.1849 -		}
 12.1850 -
 12.1851 -		boguscnt--;
 12.1852 -	} while (boguscnt > 0);
 12.1853 -
 12.1854 -	if (boguscnt <= 0) {
 12.1855 -		printk (KERN_WARNING "%s: Too much work at interrupt, "
 12.1856 -			"IntrStatus=0x%4.4x.\n", dev->name, status);
 12.1857 -
 12.1858 -		/* Clear all interrupt sources. */
 12.1859 -		RTL_W16 (IntrStatus, 0xffff);
 12.1860 -	}
 12.1861 -
 12.1862 -	spin_unlock (&tp->lock);
 12.1863 -
 12.1864 -	DPRINTK ("%s: exiting interrupt, intr_status=%#4.4x.\n",
 12.1865 -		 dev->name, RTL_R16 (IntrStatus));
 12.1866 -}
 12.1867 -
 12.1868 -
 12.1869 -static int rtl8139_close (struct net_device *dev)
 12.1870 -{
 12.1871 -	struct rtl8139_private *tp = dev->priv;
 12.1872 -	void *ioaddr = tp->mmio_addr;
 12.1873 -	unsigned long flags;
 12.1874 -
 12.1875 -	netif_stop_queue (dev);
 12.1876 -
 12.1877 -	DPRINTK ("%s: Shutting down ethercard, status was 0x%4.4x.\n",
 12.1878 -			dev->name, RTL_R16 (IntrStatus));
 12.1879 -
 12.1880 -	spin_lock_irqsave (&tp->lock, flags);
 12.1881 -
 12.1882 -	/* Stop the chip's Tx and Rx DMA processes. */
 12.1883 -	RTL_W8 (ChipCmd, 0);
 12.1884 -
 12.1885 -	/* Disable interrupts by clearing the interrupt mask. */
 12.1886 -	RTL_W16 (IntrMask, 0);
 12.1887 -
 12.1888 -	/* Update the error counts. */
 12.1889 -	tp->stats.rx_missed_errors += RTL_R32 (RxMissed);
 12.1890 -	RTL_W32 (RxMissed, 0);
 12.1891 -
 12.1892 -	spin_unlock_irqrestore (&tp->lock, flags);
 12.1893 -
 12.1894 -	synchronize_irq ();
 12.1895 -	free_irq (dev->irq, dev);
 12.1896 -
 12.1897 -	rtl8139_tx_clear (tp);
 12.1898 -
 12.1899 -	pci_free_consistent(tp->pci_dev, RX_BUF_TOT_LEN,
 12.1900 -			    tp->rx_ring, tp->rx_ring_dma);
 12.1901 -	pci_free_consistent(tp->pci_dev, TX_BUF_TOT_LEN,
 12.1902 -			    tp->tx_bufs, tp->tx_bufs_dma);
 12.1903 -	tp->rx_ring = NULL;
 12.1904 -	tp->tx_bufs = NULL;
 12.1905 -
 12.1906 -	/* Green! Put the chip in low-power mode. */
 12.1907 -	RTL_W8 (Cfg9346, Cfg9346_Unlock);
 12.1908 -
 12.1909 -	if (rtl_chip_info[tp->chipset].flags & HasHltClk)
 12.1910 -		RTL_W8 (HltClk, 'H');	/* 'R' would leave the clock running. */
 12.1911 -
 12.1912 -	return 0;
 12.1913 -}
 12.1914 -
 12.1915 -
 12.1916 -/* Get the ethtool settings.  Assumes that eset points to kernel
 12.1917 -   memory, *eset has been initialized as {ETHTOOL_GSET}, and other
 12.1918 -   threads or interrupts aren't messing with the 8139.  */
 12.1919 -static void netdev_get_eset (struct net_device *dev, struct ethtool_cmd *eset)
 12.1920 -{
 12.1921 -	struct rtl8139_private *np = dev->priv;
 12.1922 -	void *ioaddr = np->mmio_addr;
 12.1923 -	u16 advert;
 12.1924 -
 12.1925 -	eset->supported = SUPPORTED_10baseT_Half
 12.1926 -		      	| SUPPORTED_10baseT_Full
 12.1927 -		      	| SUPPORTED_100baseT_Half
 12.1928 -		      	| SUPPORTED_100baseT_Full
 12.1929 -		      	| SUPPORTED_Autoneg
 12.1930 -		      	| SUPPORTED_TP;
 12.1931 -
 12.1932 -	eset->advertising = ADVERTISED_TP | ADVERTISED_Autoneg;
 12.1933 -	advert = mdio_read (dev, np->phys[0], 4);
 12.1934 -	if (advert & 0x0020)
 12.1935 -		eset->advertising |= ADVERTISED_10baseT_Half;
 12.1936 -	if (advert & 0x0040)
 12.1937 -		eset->advertising |= ADVERTISED_10baseT_Full;
 12.1938 -	if (advert & 0x0080)
 12.1939 -		eset->advertising |= ADVERTISED_100baseT_Half;
 12.1940 -	if (advert & 0x0100)
 12.1941 -		eset->advertising |= ADVERTISED_100baseT_Full;
 12.1942 -
 12.1943 -	eset->speed = (RTL_R8 (MediaStatus) & 0x08) ? 10 : 100;
 12.1944 -	/* (KON)FIXME: np->full_duplex is set or reset by the thread,
 12.1945 -	   which means this always shows half duplex if the interface
 12.1946 -	   isn't up yet, even if it has already autonegotiated.  */
 12.1947 -	eset->duplex = np->full_duplex ? DUPLEX_FULL : DUPLEX_HALF;
 12.1948 -	eset->port = PORT_TP;
 12.1949 -	/* (KON)FIXME: Is np->phys[0] correct?  starfire.c uses that.  */
 12.1950 -	eset->phy_address = np->phys[0];
 12.1951 -	eset->transceiver = XCVR_INTERNAL;
 12.1952 -	eset->autoneg = (mdio_read (dev, np->phys[0], 0) & 0x1000) != 0;
 12.1953 -	eset->maxtxpkt = 1;
 12.1954 -	eset->maxrxpkt = 1;
 12.1955 -}
 12.1956 -
 12.1957 -
 12.1958 -/* Get the ethtool Wake-on-LAN settings.  Assumes that wol points to
 12.1959 -   kernel memory, *wol has been initialized as {ETHTOOL_GWOL}, and
 12.1960 -   other threads or interrupts aren't messing with the 8139.  */
 12.1961 -static void netdev_get_wol (struct net_device *dev, struct ethtool_wolinfo *wol)
 12.1962 -{
 12.1963 -	struct rtl8139_private *np = dev->priv;
 12.1964 -	void *ioaddr = np->mmio_addr;
 12.1965 -
 12.1966 -	if (rtl_chip_info[np->chipset].flags & HasLWake) {
 12.1967 -		u8 cfg3 = RTL_R8 (Config3);
 12.1968 -		u8 cfg5 = RTL_R8 (Config5);
 12.1969 -
 12.1970 -		wol->supported = WAKE_PHY | WAKE_MAGIC
 12.1971 -			| WAKE_UCAST | WAKE_MCAST | WAKE_BCAST;
 12.1972 -
 12.1973 -		wol->wolopts = 0;
 12.1974 -		if (cfg3 & Cfg3_LinkUp)
 12.1975 -			wol->wolopts |= WAKE_PHY;
 12.1976 -		if (cfg3 & Cfg3_Magic)
 12.1977 -			wol->wolopts |= WAKE_MAGIC;
 12.1978 -		/* (KON)FIXME: See how netdev_set_wol() handles the
 12.1979 -		   following constants.  */
 12.1980 -		if (cfg5 & Cfg5_UWF)
 12.1981 -			wol->wolopts |= WAKE_UCAST;
 12.1982 -		if (cfg5 & Cfg5_MWF)
 12.1983 -			wol->wolopts |= WAKE_MCAST;
 12.1984 -		if (cfg5 & Cfg5_BWF)
 12.1985 -			wol->wolopts |= WAKE_BCAST;
 12.1986 -	}
 12.1987 -}
 12.1988 -
 12.1989 -
 12.1990 -/* Set the ethtool Wake-on-LAN settings.  Return 0 or -errno.  Assumes
 12.1991 -   that wol points to kernel memory and other threads or interrupts
 12.1992 -   aren't messing with the 8139.  */
 12.1993 -static int netdev_set_wol (struct net_device *dev,
 12.1994 -			   const struct ethtool_wolinfo *wol)
 12.1995 -{
 12.1996 -	struct rtl8139_private *np = dev->priv;
 12.1997 -	void *ioaddr = np->mmio_addr;
 12.1998 -	u32 support;
 12.1999 -	u8 cfg3, cfg5;
 12.2000 -
 12.2001 -	support = ((rtl_chip_info[np->chipset].flags & HasLWake)
 12.2002 -		   ? (WAKE_PHY | WAKE_MAGIC
 12.2003 -		      | WAKE_UCAST | WAKE_MCAST | WAKE_BCAST)
 12.2004 -		   : 0);
 12.2005 -	if (wol->wolopts & ~support)
 12.2006 -		return -EINVAL;
 12.2007 -
 12.2008 -	cfg3 = RTL_R8 (Config3) & ~(Cfg3_LinkUp | Cfg3_Magic);
 12.2009 -	if (wol->wolopts & WAKE_PHY)
 12.2010 -		cfg3 |= Cfg3_LinkUp;
 12.2011 -	if (wol->wolopts & WAKE_MAGIC)
 12.2012 -		cfg3 |= Cfg3_Magic;
 12.2013 -	RTL_W8 (Cfg9346, Cfg9346_Unlock);
 12.2014 -	RTL_W8 (Config3, cfg3);
 12.2015 -	RTL_W8 (Cfg9346, Cfg9346_Lock);
 12.2016 -
 12.2017 -	cfg5 = RTL_R8 (Config5) & ~(Cfg5_UWF | Cfg5_MWF | Cfg5_BWF);
 12.2018 -	/* (KON)FIXME: These are untested.  We may have to set the
 12.2019 -	   CRC0, Wakeup0 and LSBCRC0 registers too, but I have no
 12.2020 -	   documentation.  */
 12.2021 -	if (wol->wolopts & WAKE_UCAST)
 12.2022 -		cfg5 |= Cfg5_UWF;
 12.2023 -	if (wol->wolopts & WAKE_MCAST)
 12.2024 -		cfg5 |= Cfg5_MWF;
 12.2025 -	if (wol->wolopts & WAKE_BCAST)
 12.2026 -		cfg5 |= Cfg5_BWF;
 12.2027 -	RTL_W8 (Config5, cfg5);	/* need not unlock via Cfg9346 */
 12.2028 -
 12.2029 -	return 0;
 12.2030 -}
 12.2031 -
 12.2032 -
 12.2033 -static int netdev_ethtool_ioctl (struct net_device *dev, void *useraddr)
 12.2034 -{
 12.2035 -	struct rtl8139_private *np = dev->priv;
 12.2036 -	u32 ethcmd;
 12.2037 -
 12.2038 -	/* dev_ioctl() in ../../net/core/dev.c has already checked
 12.2039 -	   capable(CAP_NET_ADMIN), so don't bother with that here.  */
 12.2040 -
 12.2041 -	if (copy_from_user (&ethcmd, useraddr, sizeof (ethcmd)))
 12.2042 -		return -EFAULT;
 12.2043 -
 12.2044 -	switch (ethcmd) {
 12.2045 -	case ETHTOOL_GSET:
 12.2046 -		{
 12.2047 -			struct ethtool_cmd eset = { ETHTOOL_GSET };
 12.2048 -			spin_lock_irq (&np->lock);
 12.2049 -			netdev_get_eset (dev, &eset);
 12.2050 -			spin_unlock_irq (&np->lock);
 12.2051 -			if (copy_to_user (useraddr, &eset, sizeof (eset)))
 12.2052 -				return -EFAULT;
 12.2053 -			return 0;
 12.2054 -		}
 12.2055 -
 12.2056 -	/* TODO: ETHTOOL_SSET */
 12.2057 -
 12.2058 -	case ETHTOOL_GDRVINFO:
 12.2059 -		{
 12.2060 -			struct ethtool_drvinfo info = { ETHTOOL_GDRVINFO };
 12.2061 -			strcpy (info.driver, DRV_NAME);
 12.2062 -			strcpy (info.version, DRV_VERSION);
 12.2063 -			strcpy (info.bus_info, np->pci_dev->slot_name);
 12.2064 -			if (copy_to_user (useraddr, &info, sizeof (info)))
 12.2065 -				return -EFAULT;
 12.2066 -			return 0;
 12.2067 -		}
 12.2068 -
 12.2069 -	case ETHTOOL_GWOL:
 12.2070 -		{
 12.2071 -			struct ethtool_wolinfo wol = { ETHTOOL_GWOL };
 12.2072 -			spin_lock_irq (&np->lock);
 12.2073 -			netdev_get_wol (dev, &wol);
 12.2074 -			spin_unlock_irq (&np->lock);
 12.2075 -			if (copy_to_user (useraddr, &wol, sizeof (wol)))
 12.2076 -				return -EFAULT;
 12.2077 -			return 0;
 12.2078 -		}
 12.2079 -
 12.2080 -	case ETHTOOL_SWOL:
 12.2081 -		{
 12.2082 -			struct ethtool_wolinfo wol;
 12.2083 -			int rc;
 12.2084 -			if (copy_from_user (&wol, useraddr, sizeof (wol)))
 12.2085 -				return -EFAULT;
 12.2086 -			spin_lock_irq (&np->lock);
 12.2087 -			rc = netdev_set_wol (dev, &wol);
 12.2088 -			spin_unlock_irq (&np->lock);
 12.2089 -			return rc;
 12.2090 -		}
 12.2091 -
 12.2092 -	default:
 12.2093 -		break;
 12.2094 -	}
 12.2095 -
 12.2096 -	return -EOPNOTSUPP;
 12.2097 -}
 12.2098 -
 12.2099 -
 12.2100 -static int netdev_ioctl (struct net_device *dev, struct ifreq *rq, int cmd)
 12.2101 -{
 12.2102 -	struct rtl8139_private *tp = dev->priv;
 12.2103 -	struct mii_ioctl_data *data = (struct mii_ioctl_data *)&rq->ifr_data;
 12.2104 -	int rc = 0;
 12.2105 -	int phy = tp->phys[0] & 0x3f;
 12.2106 -
 12.2107 -	if (cmd != SIOCETHTOOL) {
 12.2108 -		/* With SIOCETHTOOL, this would corrupt the pointer.  */
 12.2109 -		data->phy_id &= 0x1f;
 12.2110 -		data->reg_num &= 0x1f;
 12.2111 -	}
 12.2112 -
 12.2113 -	switch (cmd) {
 12.2114 -	case SIOCETHTOOL:
 12.2115 -		return netdev_ethtool_ioctl(dev, (void *) rq->ifr_data);
 12.2116 -
 12.2117 -	case SIOCGMIIPHY:	/* Get the address of the PHY in use. */
 12.2118 -	case SIOCDEVPRIVATE:	/* binary compat, remove in 2.5 */
 12.2119 -		data->phy_id = phy;
 12.2120 -		/* Fall Through */
 12.2121 -
 12.2122 -	case SIOCGMIIREG:	/* Read the specified MII register. */
 12.2123 -	case SIOCDEVPRIVATE+1:	/* binary compat, remove in 2.5 */
 12.2124 -		data->val_out = mdio_read (dev, data->phy_id, data->reg_num);
 12.2125 -		break;
 12.2126 -
 12.2127 -	case SIOCSMIIREG:	/* Write the specified MII register */
 12.2128 -	case SIOCDEVPRIVATE+2:	/* binary compat, remove in 2.5 */
 12.2129 -		if (!capable (CAP_NET_ADMIN)) {
 12.2130 -			rc = -EPERM;
 12.2131 -			break;
 12.2132 -		}
 12.2133 -
 12.2134 -		if (data->phy_id == phy) {
 12.2135 -			u16 value = data->val_in;
 12.2136 -			switch (data->reg_num) {
 12.2137 -			case 0:
 12.2138 -				/* Check for autonegotiation on or reset. */
 12.2139 -				tp->medialock = (value & 0x9000) ? 0 : 1;
 12.2140 -				if (tp->medialock)
 12.2141 -					tp->full_duplex = (value & 0x0100) ? 1 : 0;
 12.2142 -				break;
 12.2143 -			case 4: /* tp->advertising = value; */ break;
 12.2144 -			}
 12.2145 -		}
 12.2146 -		mdio_write(dev, data->phy_id, data->reg_num, data->val_in);
 12.2147 -		break;
 12.2148 -
 12.2149 -	default:
 12.2150 -		rc = -EOPNOTSUPP;
 12.2151 -		break;
 12.2152 -	}
 12.2153 -
 12.2154 -	return rc;
 12.2155 -}
 12.2156 -
 12.2157 -
 12.2158 -static struct net_device_stats *rtl8139_get_stats (struct net_device *dev)
 12.2159 -{
 12.2160 -	struct rtl8139_private *tp = dev->priv;
 12.2161 -	void *ioaddr = tp->mmio_addr;
 12.2162 -	unsigned long flags;
 12.2163 -
 12.2164 -	if (netif_running(dev)) {
 12.2165 -		spin_lock_irqsave (&tp->lock, flags);
 12.2166 -		tp->stats.rx_missed_errors += RTL_R32 (RxMissed);
 12.2167 -		RTL_W32 (RxMissed, 0);
 12.2168 -		spin_unlock_irqrestore (&tp->lock, flags);
 12.2169 -	}
 12.2170 -
 12.2171 -	return &tp->stats;
 12.2172 -}
 12.2173 -
 12.2174 -/* Set or clear the multicast filter for this adaptor.
 12.2175 -   This routine is not state sensitive and need not be SMP locked. */
 12.2176 -
 12.2177 -static unsigned const ethernet_polynomial = 0x04c11db7U;
 12.2178 -static inline u32 ether_crc (int length, unsigned char *data)
 12.2179 -{
 12.2180 -	int crc = -1;
 12.2181 -
 12.2182 -	while (--length >= 0) {
 12.2183 -		unsigned char current_octet = *data++;
 12.2184 -		int bit;
 12.2185 -		for (bit = 0; bit < 8; bit++, current_octet >>= 1)
 12.2186 -			crc = (crc << 1) ^ ((crc < 0) ^ (current_octet & 1) ?
 12.2187 -			     ethernet_polynomial : 0);
 12.2188 -	}
 12.2189 -
 12.2190 -	return crc;
 12.2191 -}
 12.2192 -
 12.2193 -
 12.2194 -static void __set_rx_mode (struct net_device *dev)
 12.2195 -{
 12.2196 -	struct rtl8139_private *tp = dev->priv;
 12.2197 -	void *ioaddr = tp->mmio_addr;
 12.2198 -	u32 mc_filter[2];	/* Multicast hash filter */
 12.2199 -	int i, rx_mode;
 12.2200 -	u32 tmp;
 12.2201 -
 12.2202 -	DPRINTK ("%s:   rtl8139_set_rx_mode(%4.4x) done -- Rx config %8.8lx.\n",
 12.2203 -			dev->name, dev->flags, RTL_R32 (RxConfig));
 12.2204 -
 12.2205 -	/* Note: do not reorder, GCC is clever about common statements. */
 12.2206 -	if (dev->flags & IFF_PROMISC) {
 12.2207 -		/* Unconditionally log net taps. */
 12.2208 -		printk (KERN_NOTICE "%s: Promiscuous mode enabled.\n",
 12.2209 -			dev->name);
 12.2210 -		rx_mode =
 12.2211 -		    AcceptBroadcast | AcceptMulticast | AcceptMyPhys |
 12.2212 -		    AcceptAllPhys;
 12.2213 -		mc_filter[1] = mc_filter[0] = 0xffffffff;
 12.2214 -	} else if ((dev->mc_count > multicast_filter_limit)
 12.2215 -		   || (dev->flags & IFF_ALLMULTI)) {
 12.2216 -		/* Too many to filter perfectly -- accept all multicasts. */
 12.2217 -		rx_mode = AcceptBroadcast | AcceptMulticast | AcceptMyPhys;
 12.2218 -		mc_filter[1] = mc_filter[0] = 0xffffffff;
 12.2219 -	} else {
 12.2220 -		struct dev_mc_list *mclist;
 12.2221 -		rx_mode = AcceptBroadcast | AcceptMyPhys;
 12.2222 -		mc_filter[1] = mc_filter[0] = 0;
 12.2223 -		for (i = 0, mclist = dev->mc_list; mclist && i < dev->mc_count;
 12.2224 -		     i++, mclist = mclist->next) {
 12.2225 -			int bit_nr = ether_crc(ETH_ALEN, mclist->dmi_addr) >> 26;
 12.2226 -
 12.2227 -			mc_filter[bit_nr >> 5] |= cpu_to_le32(1 << (bit_nr & 31));
 12.2228 -			rx_mode |= AcceptMulticast;
 12.2229 -		}
 12.2230 -	}
 12.2231 -
 12.2232 -	/* We can safely update without stopping the chip. */
 12.2233 -	tmp = rtl8139_rx_config | rx_mode;
 12.2234 -	if (tp->rx_config != tmp) {
 12.2235 -		RTL_W32_F (RxConfig, tmp);
 12.2236 -		tp->rx_config = tmp;
 12.2237 -	}
 12.2238 -	RTL_W32_F (MAR0 + 0, mc_filter[0]);
 12.2239 -	RTL_W32_F (MAR0 + 4, mc_filter[1]);
 12.2240 -}
 12.2241 -
 12.2242 -static void rtl8139_set_rx_mode (struct net_device *dev)
 12.2243 -{
 12.2244 -	unsigned long flags;
 12.2245 -	struct rtl8139_private *tp = dev->priv;
 12.2246 -
 12.2247 -	spin_lock_irqsave (&tp->lock, flags);
 12.2248 -	__set_rx_mode(dev);
 12.2249 -	spin_unlock_irqrestore (&tp->lock, flags);
 12.2250 -}
 12.2251 -
 12.2252 -#ifdef CONFIG_PM
 12.2253 -
 12.2254 -static int rtl8139_suspend (struct pci_dev *pdev, u32 state)
 12.2255 -{
 12.2256 -	struct net_device *dev = pci_get_drvdata (pdev);
 12.2257 -	struct rtl8139_private *tp = dev->priv;
 12.2258 -	void *ioaddr = tp->mmio_addr;
 12.2259 -	unsigned long flags;
 12.2260 -
 12.2261 -	if (!netif_running (dev))
 12.2262 -		return 0;
 12.2263 -
 12.2264 -	netif_device_detach (dev);
 12.2265 -
 12.2266 -	spin_lock_irqsave (&tp->lock, flags);
 12.2267 -
 12.2268 -	/* Disable interrupts, stop Tx and Rx. */
 12.2269 -	RTL_W16 (IntrMask, 0);
 12.2270 -	RTL_W8 (ChipCmd, 0);
 12.2271 -
 12.2272 -	/* Update the error counts. */
 12.2273 -	tp->stats.rx_missed_errors += RTL_R32 (RxMissed);
 12.2274 -	RTL_W32 (RxMissed, 0);
 12.2275 -
 12.2276 -	spin_unlock_irqrestore (&tp->lock, flags);
 12.2277 -	return 0;
 12.2278 -}
 12.2279 -
 12.2280 -
 12.2281 -static int rtl8139_resume (struct pci_dev *pdev)
 12.2282 -{
 12.2283 -	struct net_device *dev = pci_get_drvdata (pdev);
 12.2284 -
 12.2285 -	if (!netif_running (dev))
 12.2286 -		return 0;
 12.2287 -	netif_device_attach (dev);
 12.2288 -	rtl8139_hw_start (dev);
 12.2289 -	return 0;
 12.2290 -}
 12.2291 -
 12.2292 -#endif /* CONFIG_PM */
 12.2293 -
 12.2294 -
 12.2295 -static struct pci_driver rtl8139_pci_driver = {
 12.2296 -	name:		DRV_NAME,
 12.2297 -	id_table:	rtl8139_pci_tbl,
 12.2298 -	probe:		rtl8139_init_one,
 12.2299 -	remove:		__devexit_p(rtl8139_remove_one),
 12.2300 -#ifdef CONFIG_PM
 12.2301 -	suspend:	rtl8139_suspend,
 12.2302 -	resume:		rtl8139_resume,
 12.2303 -#endif /* CONFIG_PM */
 12.2304 -};
 12.2305 -
 12.2306 -
 12.2307 -static int __init rtl8139_init_module (void)
 12.2308 -{
 12.2309 -	/* when we're a module, we always print a version message,
 12.2310 -	 * even if no 8139 board is found.
 12.2311 -	 */
 12.2312 -#ifdef MODULE
 12.2313 -	printk (KERN_INFO RTL8139_DRIVER_NAME "\n");
 12.2314 -#endif
 12.2315 -
 12.2316 -	return pci_module_init (&rtl8139_pci_driver);
 12.2317 -}
 12.2318 -
 12.2319 -
 12.2320 -static void __exit rtl8139_cleanup_module (void)
 12.2321 -{
 12.2322 -	pci_unregister_driver (&rtl8139_pci_driver);
 12.2323 -}
 12.2324 -
 12.2325 -
 12.2326 -module_init(rtl8139_init_module);
 12.2327 -module_exit(rtl8139_cleanup_module);
    13.1 --- a/xen-2.4.16/drivers/net/eepro100.c	Fri Feb 14 13:18:19 2003 +0000
    13.2 +++ b/xen-2.4.16/drivers/net/eepro100.c	Fri Feb 14 14:27:45 2003 +0000
    13.3 @@ -43,12 +43,15 @@ static int rxdmacount /* = 0 */;
    13.4  
    13.5  /* Set the copy breakpoint for the copy-only-tiny-buffer Rx method.
    13.6     Lower values use more memory, but are faster. */
    13.7 -#if defined(__alpha__) || defined(__sparc__) || defined(__mips__) || \
    13.8 +/*#if defined(__alpha__) || defined(__sparc__) || defined(__mips__) || \
    13.9      defined(__arm__)
   13.10  static int rx_copybreak = 1518;
   13.11  #else
   13.12  static int rx_copybreak = 200;
   13.13 -#endif
   13.14 +#endif*/
   13.15 +
   13.16 +/* Xen doesn't do rx_copybreak in drivers. */
   13.17 +static int rx_copybreak = 0;
   13.18  
   13.19  /* Maximum events (Rx packets, etc.) to handle at each interrupt. */
   13.20  static int max_interrupt_work = 20;
    14.1 --- a/xen-2.4.16/drivers/net/pcnet32.c	Fri Feb 14 13:18:19 2003 +0000
    14.2 +++ b/xen-2.4.16/drivers/net/pcnet32.c	Fri Feb 14 14:27:45 2003 +0000
    14.3 @@ -66,7 +66,7 @@ static int tx_start = 1; /* Mapping -- 0
    14.4  static struct net_device *pcnet32_dev;
    14.5  
    14.6  static const int max_interrupt_work = 80;
    14.7 -static const int rx_copybreak = 200;
    14.8 +static const int rx_copybreak = 0; /* 200; Xen doesn't do in-driver copybreak. */
    14.9  
   14.10  #define PORT_AUI      0x00
   14.11  #define PORT_10BT     0x01
    15.1 --- a/xen-2.4.16/drivers/net/tg3.c	Fri Feb 14 13:18:19 2003 +0000
    15.2 +++ b/xen-2.4.16/drivers/net/tg3.c	Fri Feb 14 14:27:45 2003 +0000
    15.3 @@ -4114,14 +4114,16 @@ static int tg3_reset_hw(struct tg3 *tp)
    15.4  		udelay(10);
    15.5  	}
    15.6  
    15.7 -	tw32(HOSTCC_RXCOL_TICKS, 0);
    15.8 -	tw32(HOSTCC_RXMAX_FRAMES, 1);
    15.9 -	tw32(HOSTCC_RXCOAL_TICK_INT, 0);
   15.10 -	tw32(HOSTCC_RXCOAL_MAXF_INT, 1);
   15.11 -	tw32(HOSTCC_TXCOL_TICKS, LOW_TXCOL_TICKS);
   15.12 -	tw32(HOSTCC_TXMAX_FRAMES, LOW_RXMAX_FRAMES);
   15.13 -	tw32(HOSTCC_TXCOAL_TICK_INT, 0);
   15.14 -	tw32(HOSTCC_TXCOAL_MAXF_INT, 0);
   15.15 +        // akw: I have set these all back to default coalescing values.
   15.16 +        
   15.17 +	tw32(HOSTCC_RXCOL_TICKS, DEFAULT_RXCOL_TICKS); //0);
   15.18 +	tw32(HOSTCC_RXMAX_FRAMES, DEFAULT_RXMAX_FRAMES); //1);
   15.19 +	tw32(HOSTCC_RXCOAL_TICK_INT, DEFAULT_RXCOAL_TICK_INT); //, 0);
   15.20 +	tw32(HOSTCC_RXCOAL_MAXF_INT, DEFAULT_RXCOAL_MAXF_INT); //, 1);
   15.21 +	tw32(HOSTCC_TXCOL_TICKS, DEFAULT_TXCOL_TICKS); //, LOW_TXCOL_TICKS);
   15.22 +	tw32(HOSTCC_TXMAX_FRAMES, DEFAULT_TXMAX_FRAMES); //, LOW_RXMAX_FRAMES);
   15.23 +	tw32(HOSTCC_TXCOAL_TICK_INT, DEFAULT_TXCOAL_TICK_INT); //, 0);
   15.24 +	tw32(HOSTCC_TXCOAL_MAXF_INT, DEFAULT_TXCOAL_MAXF_INT); //, 0);
   15.25  	tw32(HOSTCC_STAT_COAL_TICKS,
   15.26  	     DEFAULT_STAT_COAL_TICKS);
   15.27  
   15.28 @@ -6185,9 +6187,11 @@ static int __devinit tg3_get_invariants(
   15.29  	}
   15.30  
   15.31  	tp->rx_offset = 2;
   15.32 +
   15.33  	if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5701 &&
   15.34  	    (tp->tg3_flags & TG3_FLAG_PCIX_MODE) != 0)
   15.35 -		tp->rx_offset = 0;
   15.36 +                printk("WARNING: This card may not support unaligned receive pointers.\n");
   15.37 +		//tp->rx_offset = 0;
   15.38  
   15.39  	/* By default, disable wake-on-lan.  User can change this
   15.40  	 * using ETHTOOL_SWOL.
    16.1 --- a/xen-2.4.16/drivers/net/tg3.h	Fri Feb 14 13:18:19 2003 +0000
    16.2 +++ b/xen-2.4.16/drivers/net/tg3.h	Fri Feb 14 14:27:45 2003 +0000
    16.3 @@ -21,7 +21,7 @@
    16.4  #define TG3_BDINFO_NIC_ADDR		0xcUL /* 32-bit */
    16.5  #define TG3_BDINFO_SIZE			0x10UL
    16.6  
    16.7 -#define RX_COPY_THRESHOLD  		256
    16.8 +#define RX_COPY_THRESHOLD  		0 //256
    16.9  
   16.10  #define RX_STD_MAX_SIZE			1536
   16.11  #define RX_JUMBO_MAX_SIZE		0xdeadbeef /* XXX */
    17.1 --- a/xen-2.4.16/drivers/net/tulip/interrupt.c	Fri Feb 14 13:18:19 2003 +0000
    17.2 +++ b/xen-2.4.16/drivers/net/tulip/interrupt.c	Fri Feb 14 14:27:45 2003 +0000
    17.3 @@ -172,6 +172,7 @@ static int tulip_rx(struct net_device *d
    17.4  			   to a minimally-sized skbuff. */
    17.5  			if (pkt_len < tulip_rx_copybreak
    17.6  				&& (skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
    17.7 +                        //if (0) {
    17.8  				skb->dev = dev;
    17.9  				skb_reserve(skb, 2);	/* 16 byte align the IP header */
   17.10  				pci_dma_sync_single(tp->pdev,
    18.1 --- a/xen-2.4.16/drivers/net/tulip/tulip_core.c	Fri Feb 14 13:18:19 2003 +0000
    18.2 +++ b/xen-2.4.16/drivers/net/tulip/tulip_core.c	Fri Feb 14 14:27:45 2003 +0000
    18.3 @@ -60,13 +60,16 @@ const char * const medianame[32] = {
    18.4  };
    18.5  
    18.6  /* Set the copy breakpoint for the copy-only-tiny-buffer Rx structure. */
    18.7 -#if defined(__alpha__) || defined(__arm__) || defined(__hppa__) \
    18.8 +/*#if defined(__alpha__) || defined(__arm__) || defined(__hppa__) \
    18.9  	|| defined(__sparc_) || defined(__ia64__) \
   18.10  	|| defined(__sh__) || defined(__mips__)
   18.11  static int rx_copybreak = 1518;
   18.12  #else
   18.13  static int rx_copybreak = 100;
   18.14  #endif
   18.15 +*/
   18.16 +/* Xen doesn't do rx_copybreak in drivers. */
   18.17 +static int rx_copybreak = 0;
   18.18  
   18.19  /*
   18.20    Set the bus performance register.
    19.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    19.2 +++ b/xen-2.4.16/include/asm-i386/flushtlb.h	Fri Feb 14 14:27:45 2003 +0000
    19.3 @@ -0,0 +1,48 @@
    19.4 +/******************************************************************************
    19.5 + * flushtlb.h
    19.6 + * 
    19.7 + * TLB flush macros that count flushes.  Counting is used to enforce 
    19.8 + * zero-copy safety, particularily for the network code.
    19.9 + *
   19.10 + * akw - Jan 21, 2003
   19.11 + */
   19.12 +
   19.13 +#ifndef __FLUSHTLB_H
   19.14 +#define __FLUSHTLB_H
   19.15 +
   19.16 +#include <xeno/smp.h>
   19.17 +
   19.18 +unsigned long tlb_flush_count[NR_CPUS];
   19.19 +//#if 0 
   19.20 +#define __read_cr3(__var)                                               \
   19.21 +    do {                                                                \
   19.22 +                __asm__ __volatile (                                    \
   19.23 +                        "movl %%cr3, %0;"                               \
   19.24 +                        : "=r" (__var));                                \
   19.25 +    } while (0)
   19.26 +//#endif
   19.27 +
   19.28 +#define __write_cr3_counted(__pa)                                       \
   19.29 +    do {                                                                \
   19.30 +                __asm__ __volatile__ (                                  \
   19.31 +                        "movl %0, %%cr3;"                               \
   19.32 +                        :: "r" (__pa)                                    \
   19.33 +                        : "memory");                                    \
   19.34 +                tlb_flush_count[smp_processor_id()]++;                  \
   19.35 +    } while (0)
   19.36 +
   19.37 +//#endif
   19.38 +#define __flush_tlb_counted()                                           \
   19.39 +        do {                                                            \
   19.40 +                unsigned int tmpreg;                                    \
   19.41 +                                                                        \
   19.42 +                __asm__ __volatile__(                                   \
   19.43 +                        "movl %%cr3, %0;  # flush TLB \n"               \
   19.44 +                        "movl %0, %%cr3;                "               \
   19.45 +                        : "=r" (tmpreg)                                \
   19.46 +                        :: "memory");                                   \
   19.47 +                tlb_flush_count[smp_processor_id()]++;                  \
   19.48 +        } while (0)
   19.49 +
   19.50 +#endif
   19.51 +                           
    20.1 --- a/xen-2.4.16/include/asm-i386/page.h	Fri Feb 14 13:18:19 2003 +0000
    20.2 +++ b/xen-2.4.16/include/asm-i386/page.h	Fri Feb 14 14:27:45 2003 +0000
    20.3 @@ -91,36 +91,36 @@ typedef struct { unsigned long pt_lo; } 
    20.4  #include <asm/processor.h>
    20.5  #include <asm/fixmap.h>
    20.6  #include <asm/bitops.h>
    20.7 +#include <asm/flushtlb.h>
    20.8  
    20.9  extern l2_pgentry_t idle0_pg_table[ENTRIES_PER_L2_PAGETABLE];
   20.10  extern l2_pgentry_t *idle_pg_table[NR_CPUS];
   20.11  extern void paging_init(void);
   20.12  
   20.13 -#define __flush_tlb()							\
   20.14 -	do {								\
   20.15 -		unsigned int tmpreg;					\
   20.16 -									\
   20.17 -		__asm__ __volatile__(					\
   20.18 -			"movl %%cr3, %0;  # flush TLB \n"		\
   20.19 -			"movl %0, %%cr3;              \n"		\
   20.20 -			: "=r" (tmpreg)					\
   20.21 -			:: "memory");					\
   20.22 -	} while (0)
   20.23 +#define __flush_tlb() __flush_tlb_counted()
   20.24  
   20.25  /* Flush global pages as well. */
   20.26 +
   20.27 +#define __pge_off()                                                     \
   20.28 +        do {                                                            \
   20.29 +                __asm__ __volatile__(                                   \
   20.30 +                        "movl %0, %%cr4;  # turn off PGE     "          \
   20.31 +                        :: "r" (mmu_cr4_features & ~X86_CR4_PGE));      \
   20.32 +        } while (0)
   20.33 +
   20.34 +#define __pge_on()                                                      \
   20.35 +        do {                                                            \
   20.36 +                __asm__ __volatile__(                                   \
   20.37 +                        "movl %0, %%cr4;  # turn off PGE     "          \
   20.38 +                        :: "r" (mmu_cr4_features));                     \
   20.39 +        } while (0)
   20.40 +
   20.41 +
   20.42  #define __flush_tlb_all()						\
   20.43  	do {								\
   20.44 -		unsigned int tmpreg;					\
   20.45 -									\
   20.46 -		__asm__ __volatile__(					\
   20.47 -			"movl %1, %%cr4;  # turn off PGE     \n"	\
   20.48 -			"movl %%cr3, %0;  # flush TLB        \n"	\
   20.49 -			"movl %0, %%cr3;                     \n"	\
   20.50 -			"movl %2, %%cr4;  # turn PGE back on \n"	\
   20.51 -			: "=&r" (tmpreg)				\
   20.52 -			: "r" (mmu_cr4_features & ~X86_CR4_PGE),	\
   20.53 -			  "r" (mmu_cr4_features)			\
   20.54 -			: "memory");					\
   20.55 +                __pge_off();                                            \
   20.56 +		__flush_tlb_counted();					\
   20.57 +                __pge_on();                                             \
   20.58  	} while (0)
   20.59  
   20.60  #define __flush_tlb_one(__addr) \
    21.1 --- a/xen-2.4.16/include/asm-i386/pci.h	Fri Feb 14 13:18:19 2003 +0000
    21.2 +++ b/xen-2.4.16/include/asm-i386/pci.h	Fri Feb 14 14:27:45 2003 +0000
    21.3 @@ -75,6 +75,7 @@ static inline dma_addr_t pci_map_single(
    21.4  	if (direction == PCI_DMA_NONE)
    21.5  		BUG();
    21.6  	flush_write_buffers();
    21.7 +
    21.8  	return virt_to_bus(ptr);
    21.9  }
   21.10  
    22.1 --- a/xen-2.4.16/include/hypervisor-ifs/hypervisor-if.h	Fri Feb 14 13:18:19 2003 +0000
    22.2 +++ b/xen-2.4.16/include/hypervisor-ifs/hypervisor-if.h	Fri Feb 14 14:27:45 2003 +0000
    22.3 @@ -111,11 +111,6 @@ typedef struct
    22.4  #define EVENT_NET_TX   0x20 /* packets for transmission. */
    22.5  #define EVENT_NET_RX   0x40 /* empty buffers for receive. */
    22.6  
    22.7 -/* should these macros and the ones below test for range violation? */
    22.8 -#define EVENT_NET_TX_FOR_VIF(x)    (EVENT_NET_TX << (2 * x))
    22.9 -#define EVENT_NET_RX_FOR_VIF(x)    (EVENT_NET_RX << (2 * x))
   22.10 -
   22.11 -
   22.12  /* Bit offsets, as opposed to the above masks. */
   22.13  #define _EVENT_BLK_TX  0
   22.14  #define _EVENT_BLK_RX  1
   22.15 @@ -125,9 +120,6 @@ typedef struct
   22.16  #define _EVENT_NET_RX  5
   22.17  #define _EVENT_DEBUG   6
   22.18  
   22.19 -#define _EVENT_NET_TX_FOR_VIF(x)    (_EVENT_NET_TX + (2 * x))
   22.20 -#define _EVENT_NET_RX_FOR_VIF(x)    (_EVENT_NET_RX + (2 * x))
   22.21 -
   22.22  
   22.23  /*
   22.24   * NB. We expect that this struct is smaller than a page.
    23.1 --- a/xen-2.4.16/include/hypervisor-ifs/network.h	Fri Feb 14 13:18:19 2003 +0000
    23.2 +++ b/xen-2.4.16/include/hypervisor-ifs/network.h	Fri Feb 14 14:27:45 2003 +0000
    23.3 @@ -15,17 +15,19 @@
    23.4  #include <linux/types.h>
    23.5  
    23.6  typedef struct tx_entry_st {
    23.7 -	unsigned long addr; /* virtual address */
    23.8 -	unsigned long size; /* in bytes */
    23.9 +	unsigned long addr;   /* virtual address */
   23.10 +	unsigned long size;   /* in bytes */
   23.11 +        int           status; /* per descriptor status. */
   23.12  } tx_entry_t;
   23.13  
   23.14  typedef struct rx_entry_st {
   23.15 -	unsigned long addr; /* virtual address */
   23.16 -	unsigned long size; /* in bytes */
   23.17 +	unsigned long addr;   /* virtual address */
   23.18 +	unsigned long size;   /* in bytes */
   23.19 +        int           status; /* per descriptor status. */
   23.20  } rx_entry_t;
   23.21  
   23.22 -#define TX_RING_SIZE 1024
   23.23 -#define RX_RING_SIZE 1024
   23.24 +#define TX_RING_SIZE 256
   23.25 +#define RX_RING_SIZE 256
   23.26  typedef struct net_ring_st {
   23.27      /*
   23.28       * Guest OS places packets into ring at tx_prod.
   23.29 @@ -111,4 +113,12 @@ typedef struct net_rule_ent_st
   23.30  /* Drop a new rule down to the network tables. */
   23.31  int add_net_rule(net_rule_t *rule);
   23.32  
   23.33 +
   23.34 +/* Descriptor status values:
   23.35 + */
   23.36 +
   23.37 +#define RING_STATUS_OK               0  // Everything is gravy.
   23.38 +#define RING_STATUS_ERR_CFU         -1  // Copy from user problems.
   23.39 +#define RING_STATUS_BAD_PAGE        -2  // What they gave us was pure evil.
   23.40 +
   23.41  #endif
    24.1 --- a/xen-2.4.16/include/xeno/mm.h	Fri Feb 14 13:18:19 2003 +0000
    24.2 +++ b/xen-2.4.16/include/xeno/mm.h	Fri Feb 14 14:27:45 2003 +0000
    24.3 @@ -7,6 +7,7 @@
    24.4  #include <asm/desc.h>
    24.5  #include <xeno/list.h>
    24.6  #include <hypervisor-ifs/hypervisor-if.h>
    24.7 +#include <xeno/spinlock.h>
    24.8  
    24.9  /* XXX KAF: These may die eventually, but so many refs in slab.c :((( */
   24.10  
   24.11 @@ -88,7 +89,7 @@ typedef struct pfn_info {
   24.12   * references exist of teh current type. A change in type can only occur
   24.13   * when type_count == 0.
   24.14   */
   24.15 -#define PG_type_mask        (7<<24) /* bits 24-26 */
   24.16 +#define PG_type_mask        (15<<24) /* bits 24-27 */
   24.17  #define PGT_none            (0<<24) /* no special uses of this page */
   24.18  #define PGT_l1_page_table   (1<<24) /* using this page as an L1 page table? */
   24.19  #define PGT_l2_page_table   (2<<24) /* using this page as an L2 page table? */
   24.20 @@ -97,6 +98,7 @@ typedef struct pfn_info {
   24.21  #define PGT_gdt_page        (5<<24) /* using this page in a GDT? */
   24.22  #define PGT_ldt_page        (6<<24) /* using this page in an LDT? */
   24.23  #define PGT_writeable_page  (7<<24) /* has writable mappings of this page? */
   24.24 +#define PGT_net_rx_buf      (8<<24) /* this page has been pirated by the net code. */
   24.25  
   24.26  #define PageSlab(page)		test_bit(PG_slab, &(page)->flags)
   24.27  #define PageSetSlab(page)	set_bit(PG_slab, &(page)->flags)
   24.28 @@ -108,6 +110,7 @@ typedef struct pfn_info {
   24.29  extern frame_table_t * frame_table;
   24.30  extern unsigned long frame_table_size;
   24.31  extern struct list_head free_list;
   24.32 +extern spinlock_t free_list_lock;
   24.33  extern unsigned int free_pfns;
   24.34  extern unsigned long max_page;
   24.35  void init_frametable(unsigned long nr_pages);
    25.1 --- a/xen-2.4.16/include/xeno/skbuff.h	Fri Feb 14 13:18:19 2003 +0000
    25.2 +++ b/xen-2.4.16/include/xeno/skbuff.h	Fri Feb 14 14:27:45 2003 +0000
    25.3 @@ -34,6 +34,10 @@
    25.4  #define VIF_DROP                -3
    25.5  #define VIF_ANY_INTERFACE       -4
    25.6  
    25.7 +//skb_type values:
    25.8 +#define SKB_NORMAL               0
    25.9 +#define SKB_ZERO_COPY            1
   25.10 +
   25.11  #define HAVE_ALLOC_SKB		/* For the drivers to know */
   25.12  #define HAVE_ALIGNABLE_SKB	/* Ditto 8)		   */
   25.13  #define SLAB_SKB 		/* Slabified skbuffs 	   */
   25.14 @@ -187,7 +191,7 @@ struct sk_buff {
   25.15   	unsigned int 	data_len;
   25.16  	unsigned int	csum;			/* Checksum 					*/
   25.17  	unsigned char 	__unused,		/* Dead field, may be reused			*/
   25.18 -			cloned, 		/* head may be cloned (check refcnt to be sure). */
   25.19 +			cloned, 		/* head may be cloned (check refcnt to be sure) */
   25.20    			pkt_type,		/* Packet class					*/
   25.21    			ip_summed;		/* Driver fed us an IP checksum			*/
   25.22  	__u32		priority;		/* Packet queueing priority			*/
   25.23 @@ -203,8 +207,12 @@ struct sk_buff {
   25.24  
   25.25  	void 		(*destructor)(struct sk_buff *);	/* Destruct function		*/
   25.26  
   25.27 -        int src_vif;                            /* vif we came from */
   25.28 -        int dst_vif;                            /* vif we are bound for */
   25.29 +        unsigned int    skb_type;               /* SKB_NORMAL or SKB_ZERO_COPY                  */
   25.30 +        struct pfn_info *pf;                    /* record of physical pf address for freeing    */
   25.31 +        int src_vif;                            /* vif we came from                             */
   25.32 +        int dst_vif;                            /* vif we are bound for                         */
   25.33 +        struct skb_shared_info shinfo;          /* shared info is no longer shared in Xen.      */
   25.34 +        
   25.35  
   25.36                  
   25.37          
   25.38 @@ -244,6 +252,7 @@ struct sk_buff {
   25.39  
   25.40  extern void			__kfree_skb(struct sk_buff *skb);
   25.41  extern struct sk_buff *		alloc_skb(unsigned int size, int priority);
   25.42 +extern struct sk_buff *         alloc_zc_skb(unsigned int size, int priority);
   25.43  extern void			kfree_skbmem(struct sk_buff *skb);
   25.44  extern struct sk_buff *		skb_clone(struct sk_buff *skb, int priority);
   25.45  extern struct sk_buff *		skb_copy(const struct sk_buff *skb, int priority);
   25.46 @@ -259,7 +268,8 @@ extern void	skb_over_panic(struct sk_buf
   25.47  extern void	skb_under_panic(struct sk_buff *skb, int len, void *here);
   25.48  
   25.49  /* Internal */
   25.50 -#define skb_shinfo(SKB)		((struct skb_shared_info *)((SKB)->end))
   25.51 +//#define skb_shinfo(SKB)		((struct skb_shared_info *)((SKB)->end))
   25.52 +#define skb_shinfo(SKB)     ((struct skb_shared_info *)(&(SKB)->shinfo))
   25.53  
   25.54  /**
   25.55   *	skb_queue_empty - check if a queue is empty
   25.56 @@ -1045,7 +1055,8 @@ static inline struct sk_buff *__dev_allo
   25.57  {
   25.58  	struct sk_buff *skb;
   25.59  
   25.60 -	skb = alloc_skb(length+16, gfp_mask);
   25.61 +	//skb = alloc_skb(length+16, gfp_mask);
   25.62 +        skb = alloc_zc_skb(length+16, gfp_mask);
   25.63  	if (skb)
   25.64  		skb_reserve(skb,16);
   25.65  	return skb;
    26.1 --- a/xen-2.4.16/include/xeno/vif.h	Fri Feb 14 13:18:19 2003 +0000
    26.2 +++ b/xen-2.4.16/include/xeno/vif.h	Fri Feb 14 14:27:45 2003 +0000
    26.3 @@ -18,9 +18,37 @@
    26.4  #include <hypervisor-ifs/network.h>
    26.5  #include <xeno/skbuff.h>
    26.6  
    26.7 +/* 
    26.8 + * shadow ring structures are used to protect the descriptors from
    26.9 + * tampering after they have been passed to the hypervisor.
   26.10 + *
   26.11 + * TX_RING_SIZE and RX_RING_SIZE are defined in the shared network.h.
   26.12 + */
   26.13 +
   26.14 +typedef struct tx_shadow_entry_st {
   26.15 +    unsigned long addr;
   26.16 +    unsigned long size;
   26.17 +    int           status;
   26.18 +    unsigned long flush_count;
   26.19 +} tx_shadow_entry_t;
   26.20 +
   26.21 +typedef struct rx_shadow_entry_st {
   26.22 +    unsigned long addr;
   26.23 +    unsigned long size;
   26.24 +    int           status;
   26.25 +    unsigned long flush_count;
   26.26 +} rx_shadow_entry_t;
   26.27 +
   26.28 +typedef struct net_shadow_ring_st {
   26.29 +    tx_shadow_entry_t *tx_ring;
   26.30 +    rx_shadow_entry_t *rx_ring;
   26.31 +    unsigned int rx_prod, rx_cons, rx_idx;
   26.32 +} net_shadow_ring_t;
   26.33 +
   26.34  typedef struct net_vif_st {
   26.35 -    net_ring_t  *net_ring;
   26.36 -    int          id;
   26.37 +    net_ring_t          *net_ring;
   26.38 +    net_shadow_ring_t   *shadow_ring;
   26.39 +    int                 id;
   26.40      struct sk_buff_head skb_list;
   26.41      unsigned int domain;
   26.42      // rules table goes here in next revision.
   26.43 @@ -38,5 +66,11 @@ extern net_vif_t *sys_vif_list[];
   26.44  net_vif_t *create_net_vif(int domain);
   26.45  void destroy_net_vif(struct task_struct *p);
   26.46  void add_default_net_rule(int vif_id, u32 ipaddr);
   26.47 -int net_get_target_vif(struct sk_buff *skb);
   26.48 +int __net_get_target_vif(u8 *data, unsigned int len, int src_vif);
   26.49  void add_default_net_rule(int vif_id, u32 ipaddr);
   26.50 +
   26.51 +#define net_get_target_vif(skb) __net_get_target_vif(skb->data, skb->len, skb->src_vif)
   26.52 +/* status fields per-descriptor:
   26.53 + */
   26.54 +
   26.55 +
    27.1 --- a/xen-2.4.16/net/dev.c	Fri Feb 14 13:18:19 2003 +0000
    27.2 +++ b/xen-2.4.16/net/dev.c	Fri Feb 14 14:27:45 2003 +0000
    27.3 @@ -30,6 +30,8 @@
    27.4  #include <linux/pkt_sched.h>
    27.5  
    27.6  #include <linux/event.h>
    27.7 +#include <asm/domain_page.h>
    27.8 +#include <asm/pgalloc.h>
    27.9  
   27.10  #define BUG_TRAP ASSERT
   27.11  #define notifier_call_chain(_a,_b,_c) ((void)0)
   27.12 @@ -38,6 +40,18 @@
   27.13  #define rtnl_unlock() ((void)0)
   27.14  #define dst_init() ((void)0)
   27.15  
   27.16 +#if 1
   27.17 +#define DPRINTK(_f, _a...) printk(_f , ## _a)
   27.18 +#else 
   27.19 +#define DPRINTK(_f, _a...) ((void)0)
   27.20 +#endif
   27.21 +
   27.22 +// Ring defines:
   27.23 +#define TX_RING_INC(_i)    (((_i)+1) & (TX_RING_SIZE-1))
   27.24 +#define RX_RING_INC(_i)    (((_i)+1) & (RX_RING_SIZE-1))
   27.25 +#define TX_RING_ADD(_i,_j) (((_i)+(_j)) & (TX_RING_SIZE-1))
   27.26 +#define RX_RING_ADD(_i,_j) (((_i)+(_j)) & (RX_RING_SIZE-1))
   27.27 +
   27.28  struct net_device *the_dev = NULL;
   27.29  
   27.30  /*
   27.31 @@ -47,11 +61,11 @@ struct net_device *the_dev = NULL;
   27.32  struct softnet_data softnet_data[NR_CPUS] __cacheline_aligned;
   27.33  
   27.34  
   27.35 -/*****************************************************************************************
   27.36 +/*********************************************************************************
   27.37  
   27.38  			    Device Interface Subroutines
   27.39  
   27.40 -******************************************************************************************/
   27.41 +**********************************************************************************/
   27.42  
   27.43  /**
   27.44   *	__dev_get_by_name	- find a device by its name 
   27.45 @@ -493,7 +507,7 @@ int dev_queue_xmit(struct sk_buff *skb)
   27.46  {
   27.47  	struct net_device *dev = skb->dev;
   27.48  	struct Qdisc  *q;
   27.49 -
   27.50 +if (!(dev->features&NETIF_F_SG)) printk("NIC doesn't do SG!!!\n");
   27.51  	if (skb_shinfo(skb)->frag_list &&
   27.52  	    !(dev->features&NETIF_F_FRAGLIST) &&
   27.53  	    skb_linearize(skb, GFP_ATOMIC) != 0) {
   27.54 @@ -661,7 +675,80 @@ static void get_sample_stats(int cpu)
   27.55  	softnet_data[cpu].avg_blog = avg_blog;
   27.56  }
   27.57  
   27.58 +void deliver_packet(struct sk_buff *skb, net_vif_t *vif)
   27.59 +{
   27.60 +        net_shadow_ring_t *shadow_ring;
   27.61 +        rx_shadow_entry_t *rx;
   27.62 +        unsigned long *g_pte; //tmp
   27.63 +        struct pfn_info *g_pfn, *h_pfn;
   27.64 +        unsigned int i; //, nvif;
   27.65  
   27.66 +        
   27.67 +        
   27.68 +        /*
   27.69 +         * Write the virtual MAC address into the destination field
   27.70 +         * of the ethernet packet. Furthermore, do the same for ARP
   27.71 +         * reply packets. This is easy because the virtual MAC address
   27.72 +         * is always 00-[nn]-00-00-00-00, where the second sixteen bits 
   27.73 +         * of the MAC are the vif's id.  This is to differentiate between
   27.74 +         * vifs on guests that have more than one.
   27.75 +         *
   27.76 +         * In zero copy, the data pointers for the packet have to have been 
   27.77 +         * mapped in by the caller.
   27.78 +         */
   27.79 +
   27.80 +        memset(skb->mac.ethernet->h_dest, 0, ETH_ALEN);
   27.81 +//        *(unsigned int *)(skb->mac.ethernet->h_dest + 1) = nvif;
   27.82 +        if ( ntohs(skb->mac.ethernet->h_proto) == ETH_P_ARP )
   27.83 +        {
   27.84 +            memset(skb->nh.raw + 18, 0, ETH_ALEN);
   27.85 +//            *(unsigned int *)(skb->nh.raw + 18 + 1) = nvif;
   27.86 +        }
   27.87 +        shadow_ring = vif->shadow_ring;
   27.88 +
   27.89 +        //Advance to next good buffer.
   27.90 +        for (i = shadow_ring->rx_cons; 
   27.91 +             (i != shadow_ring->rx_prod) 
   27.92 +             && ( shadow_ring->rx_ring[i].status != RING_STATUS_OK );
   27.93 +             i = RX_RING_INC(i));
   27.94 +            
   27.95 +        if (( i != shadow_ring->rx_prod ) &&
   27.96 +            ( shadow_ring->rx_ring[i].status == RING_STATUS_OK ))
   27.97 +        {
   27.98 +            rx = shadow_ring->rx_ring+i;
   27.99 +            if ( (skb->len + ETH_HLEN) < rx->size )
  27.100 +                rx->size = skb->len + ETH_HLEN;
  27.101 +            
  27.102 +            g_pte = map_domain_mem(rx->addr);
  27.103 +
  27.104 +            g_pfn =  frame_table + (*g_pte >> PAGE_SHIFT);
  27.105 +            h_pfn = skb->pf;
  27.106 +
  27.107 +            h_pfn->tot_count = h_pfn->type_count = 1;
  27.108 +            g_pfn->tot_count = g_pfn->type_count = 0;
  27.109 +            h_pfn->flags = g_pfn->flags & (~PG_type_mask);
  27.110 +
  27.111 +            if (*g_pte & _PAGE_RW) h_pfn->flags |= PGT_writeable_page;
  27.112 +            g_pfn->flags = 0;
  27.113 +            
  27.114 +            //point guest pte at the new page:
  27.115 +            machine_to_phys_mapping[h_pfn - frame_table] 
  27.116 +                    = machine_to_phys_mapping[g_pfn - frame_table];
  27.117 +
  27.118 +            *g_pte = (*g_pte & ~PAGE_MASK) 
  27.119 +                | (((h_pfn - frame_table) << PAGE_SHIFT) & PAGE_MASK);
  27.120 +            *g_pte |= _PAGE_PRESENT;
  27.121 +                
  27.122 +            unmap_domain_mem(g_pte);
  27.123 +            skb->pf = g_pfn; // return the guest pfn to be put on the free list
  27.124 +                
  27.125 +            shadow_ring->rx_cons = RX_RING_INC(i);
  27.126 +        }
  27.127 +}
  27.128 +
  27.129 +/* Deliver skb to an old protocol, which is not threaded well
  27.130 +   or which do not understand shared skbs.
  27.131 + */
  27.132  /**
  27.133   *	netif_rx	-	post buffer to the network code
  27.134   *	@skb: buffer to post
  27.135 @@ -686,32 +773,56 @@ int netif_rx(struct sk_buff *skb)
  27.136  #ifdef CONFIG_SMP
  27.137          unsigned long cpu_mask;
  27.138  #endif
  27.139 +        
  27.140          struct task_struct *p;
  27.141  	int this_cpu = smp_processor_id();
  27.142  	struct softnet_data *queue;
  27.143  	unsigned long flags;
  27.144          net_vif_t *vif;
  27.145  
  27.146 +	local_irq_save(flags);
  27.147 +
  27.148 +        if (skb->skb_type != SKB_ZERO_COPY) 
  27.149 +            BUG();
  27.150 +                
  27.151  	if (skb->stamp.tv_sec == 0)
  27.152 -		get_fast_time(&skb->stamp);
  27.153 +	    get_fast_time(&skb->stamp);
  27.154 +
  27.155 +        if ( (skb->data - skb->head) != (18 + ETH_HLEN) )
  27.156 +            printk("headroom was %lu!\n", (unsigned long)skb->data - (unsigned long)skb->head);
  27.157 +        //    BUG();
  27.158 +        
  27.159 +        skb->head = (u8 *)map_domain_mem(((skb->pf - frame_table) << PAGE_SHIFT));
  27.160  
  27.161 +        /* remapping this address really screws up all the skb pointers.  We need 
  27.162 +        * to map them all here sufficiently to get the packet demultiplexed.
  27.163 +        */
  27.164 +                
  27.165 +        skb->data = skb->head;
  27.166 +        skb_reserve(skb,18); // 18 is the 16 from dev_alloc_skb plus 2 for #
  27.167 +                             // IP header alignment. 
  27.168 +        skb->mac.raw = skb->data;
  27.169 +        skb->data += ETH_HLEN;
  27.170 +        skb->nh.raw = skb->data;
  27.171 +        
  27.172  	/* The code is rearranged so that the path is the most
  27.173  	   short when CPU is congested, but is still operating.
  27.174  	 */
  27.175  	queue = &softnet_data[this_cpu];
  27.176 -
  27.177 -	local_irq_save(flags);
  27.178          
  27.179  	netdev_rx_stat[this_cpu].total++;
  27.180  
  27.181          if ( skb->src_vif == VIF_UNKNOWN_INTERFACE )
  27.182              skb->src_vif = VIF_PHYSICAL_INTERFACE;
  27.183 -
  27.184 +                
  27.185          if ( skb->dst_vif == VIF_UNKNOWN_INTERFACE )
  27.186 -            net_get_target_vif(skb);
  27.187 +            skb->dst_vif = __net_get_target_vif(skb->mac.raw, skb->len, skb->src_vif);
  27.188 +//if (skb->dst_vif == VIF_DROP)
  27.189 +//printk("netif_rx target: %d (sec: %u)\n", skb->dst_vif, skb->security);
  27.190          
  27.191          if ( (vif = sys_vif_list[skb->dst_vif]) == NULL )
  27.192          {
  27.193 +//printk("No such vif! (%d).\n", skb->dst_vif);
  27.194              // the target vif does not exist.
  27.195              goto drop;
  27.196          }
  27.197 @@ -733,7 +844,7 @@ int netif_rx(struct sk_buff *skb)
  27.198              do {
  27.199                  if ( p->domain != vif->domain ) continue;
  27.200                  if ( vif->skb_list.qlen > 100 ) break;
  27.201 -                skb_queue_tail(&vif->skb_list, skb);
  27.202 +                deliver_packet(skb, vif);
  27.203                  cpu_mask = mark_hyp_event(p, _HYP_EVENT_NET_RX);
  27.204                  read_unlock(&tasklist_lock);
  27.205                  goto found;
  27.206 @@ -745,20 +856,21 @@ int netif_rx(struct sk_buff *skb)
  27.207  
  27.208  drop:
  27.209  	netdev_rx_stat[this_cpu].dropped++;
  27.210 -	local_irq_restore(flags);
  27.211 -
  27.212 +        unmap_domain_mem(skb->head);
  27.213  	kfree_skb(skb);
  27.214 +        local_irq_restore(flags);
  27.215  	return NET_RX_DROP;
  27.216  
  27.217  found:
  27.218 +        unmap_domain_mem(skb->head);
  27.219 +        skb->head = skb->data = skb->tail = (void *)0xdeadbeef;
  27.220 +        kfree_skb(skb);
  27.221          hyp_event_notify(cpu_mask);
  27.222          local_irq_restore(flags);
  27.223          return 0;
  27.224  }
  27.225  
  27.226 -/* Deliver skb to an old protocol, which is not threaded well
  27.227 -   or which do not understand shared skbs.
  27.228 - */
  27.229 +
  27.230  static int deliver_to_old_ones(struct packet_type *pt, struct sk_buff *skb, int last)
  27.231  {
  27.232  	static spinlock_t net_bh_lock = SPIN_LOCK_UNLOCKED;
  27.233 @@ -873,15 +985,49 @@ static inline void handle_diverter(struc
  27.234  }
  27.235  #endif   /* CONFIG_NET_DIVERT */
  27.236  
  27.237 +void update_shared_ring(void)
  27.238 +{
  27.239 +    rx_shadow_entry_t *rx;
  27.240 +    shared_info_t *s = current->shared_info;
  27.241 +    net_ring_t *net_ring;
  27.242 +    net_shadow_ring_t *shadow_ring;
  27.243 +    unsigned int nvif;
  27.244 +    
  27.245 +    clear_bit(_HYP_EVENT_NET_RX, &current->hyp_events);
  27.246 +    for (nvif = 0; nvif < current->num_net_vifs; nvif++)
  27.247 +    {
  27.248 +        net_ring = current->net_vif_list[nvif]->net_ring;
  27.249 +        shadow_ring = current->net_vif_list[nvif]->shadow_ring;
  27.250 +        while ((shadow_ring->rx_idx != shadow_ring->rx_cons) 
  27.251 +                && (net_ring->rx_cons != net_ring->rx_prod))
  27.252 +        {
  27.253 +            rx = shadow_ring->rx_ring+shadow_ring->rx_idx;
  27.254 +            copy_to_user(net_ring->rx_ring + net_ring->rx_cons, rx, sizeof(rx_entry_t));
  27.255  
  27.256 +            shadow_ring->rx_idx = RX_RING_INC(shadow_ring->rx_idx);
  27.257 +            net_ring->rx_cons   = RX_RING_INC(net_ring->rx_cons);
  27.258 +            
  27.259 +            if (rx->flush_count == tlb_flush_count[smp_processor_id()])
  27.260 +                __flush_tlb();
  27.261 +
  27.262 +            if ( net_ring->rx_cons == net_ring->rx_event )
  27.263 +                set_bit(_EVENT_NET_RX, &s->events);
  27.264 +            
  27.265 +        }
  27.266 +    }
  27.267 +}
  27.268 +            
  27.269  void flush_rx_queue(void)
  27.270  {
  27.271      struct sk_buff *skb;
  27.272      shared_info_t *s = current->shared_info;
  27.273      net_ring_t *net_ring;
  27.274 +    net_shadow_ring_t *shadow_ring;
  27.275      unsigned int i, nvif;
  27.276 -    rx_entry_t rx;
  27.277 -
  27.278 +    rx_shadow_entry_t *rx;
  27.279 +    unsigned long *g_pte, tmp;
  27.280 +    struct pfn_info *g_pfn, *h_pfn;
  27.281 +    
  27.282      /* I have changed this to batch flush all vifs for a guest
  27.283       * at once, whenever this is called.  Since the guest is about to be
  27.284       * scheduled and issued an RX interrupt for one nic, it might as well
  27.285 @@ -893,15 +1039,17 @@ void flush_rx_queue(void)
  27.286       * loop can be replaced with a translation to the specific NET 
  27.287       * interrupt to serve. --akw
  27.288       */
  27.289 -    
  27.290      clear_bit(_HYP_EVENT_NET_RX, &current->hyp_events);
  27.291  
  27.292      for (nvif = 0; nvif < current->num_net_vifs; nvif++)
  27.293      {
  27.294          net_ring = current->net_vif_list[nvif]->net_ring;
  27.295 +        shadow_ring = current->net_vif_list[nvif]->shadow_ring;
  27.296          while ( (skb = skb_dequeue(&current->net_vif_list[nvif]->skb_list)) 
  27.297                          != NULL )
  27.298          {
  27.299 +            //temporary hack to stop processing non-zc skbs.
  27.300 +            if (skb->skb_type == SKB_NORMAL) continue;
  27.301              /*
  27.302               * Write the virtual MAC address into the destination field
  27.303               * of the ethernet packet. Furthermore, do the same for ARP
  27.304 @@ -912,6 +1060,16 @@ void flush_rx_queue(void)
  27.305               * second sixteen bits, which are the per-host vif id.
  27.306               * (so eth0 should be 00-00-..., eth1 is 00-01-...)
  27.307               */
  27.308 +            
  27.309 +            if (skb->skb_type == SKB_ZERO_COPY)
  27.310 +            {
  27.311 +                skb->head = (u8 *)map_domain_mem(((skb->pf - frame_table) << PAGE_SHIFT));
  27.312 +                skb->data = skb->head;
  27.313 +                skb_reserve(skb,16); 
  27.314 +                skb->mac.raw = skb->data;
  27.315 +                skb->data += ETH_HLEN;
  27.316 +            }
  27.317 +            
  27.318              memset(skb->mac.ethernet->h_dest, 0, ETH_ALEN);
  27.319              *(unsigned int *)(skb->mac.ethernet->h_dest + 1) = nvif;
  27.320              if ( ntohs(skb->mac.ethernet->h_proto) == ETH_P_ARP )
  27.321 @@ -920,19 +1078,88 @@ void flush_rx_queue(void)
  27.322                  *(unsigned int *)(skb->nh.raw + 18 + 1) = nvif;
  27.323              }
  27.324  
  27.325 +            if (skb->skb_type == SKB_ZERO_COPY)
  27.326 +            {
  27.327 +                unmap_domain_mem(skb->head);
  27.328 +            }
  27.329 +
  27.330              i = net_ring->rx_cons;
  27.331              if ( i != net_ring->rx_prod )
  27.332              {
  27.333 -                if ( !copy_from_user(&rx, net_ring->rx_ring+i, sizeof(rx)) )
  27.334 +                net_ring->rx_ring[i].status = shadow_ring->rx_ring[i].status;
  27.335 +                if ( shadow_ring->rx_ring[i].status == RING_STATUS_OK)
  27.336                  {
  27.337 -                    if ( (skb->len + ETH_HLEN) < rx.size )
  27.338 -                        rx.size = skb->len + ETH_HLEN;
  27.339 -                    copy_to_user((void *)rx.addr, skb->mac.raw, rx.size);
  27.340 -                    copy_to_user(net_ring->rx_ring+i, &rx, sizeof(rx));
  27.341 +                    rx = shadow_ring->rx_ring+i;
  27.342 +                    if ( (skb->len + ETH_HLEN) < rx->size )
  27.343 +                        rx->size = skb->len + ETH_HLEN;
  27.344 +
  27.345 +                    /* remap the packet again.  This is very temporary and will shortly be
  27.346 +                     * replaced with a page swizzle.
  27.347 +                     */
  27.348 +
  27.349 +                    /*if (skb->skb_type == SKB_ZERO_COPY)
  27.350 +                    {
  27.351 +                        skb->head = (u8 *)map_domain_mem(((skb->pf - frame_table) << PAGE_SHIFT));
  27.352 +                        skb->data = skb->head;
  27.353 +                        skb_reserve(skb,16); 
  27.354 +                        skb->mac.raw = skb->data;
  27.355 +                        skb->data += ETH_HLEN;
  27.356 +                    }
  27.357 +                                                                        
  27.358 +                    copy_to_user((void *)rx->addr, skb->mac.raw, rx->size);
  27.359 +                    copy_to_user(net_ring->rx_ring+i, rx, sizeof(rx));
  27.360 +                    
  27.361 +                    if (skb->skb_type == SKB_ZERO_COPY)
  27.362 +                    {
  27.363 +                        unmap_domain_mem(skb->head);
  27.364 +                        skb->head = skb->data = skb->tail = (void *)0xdeadbeef;
  27.365 +                    }*/
  27.366 +
  27.367 +                    //presumably I don't need to rewalk the guest page table
  27.368 +                    //here.
  27.369 +                    if (skb->skb_type == SKB_ZERO_COPY) 
  27.370 +                    {
  27.371 +                        // g_pfn is the frame FROM the guest being given up
  27.372 +                        // h_pfn is the frame FROM the hypervisor, passing up.
  27.373 +                        
  27.374 +                        if (rx->flush_count == tlb_flush_count[smp_processor_id()])
  27.375 +                        {
  27.376 +                            flush_tlb_all();
  27.377 +                        }
  27.378 +                        
  27.379 +                        g_pte = map_domain_mem(rx->addr);
  27.380 +                        
  27.381 +                        //g_pfn = frame_table + (rx->addr >> PAGE_SHIFT);
  27.382 +                        g_pfn =  frame_table + (*g_pte >> PAGE_SHIFT);
  27.383 +                        h_pfn = skb->pf;
  27.384 +
  27.385 +
  27.386 +                        //tmp = g_pfn->next; g_pfn->next = h_pfn->next; h_pfn->next = tmp;
  27.387 +                        //tmp = g_pfn->prev; g_pfn->prev = h_pfn->prev; h_pfn->prev = tmp;
  27.388 +                        tmp = g_pfn->flags; g_pfn->flags = h_pfn->flags; h_pfn->flags = tmp;
  27.389 +                        
  27.390 +                        h_pfn->tot_count = 1;
  27.391 +                        h_pfn->type_count = g_pfn->type_count;
  27.392 +                        g_pfn->tot_count = g_pfn->type_count = 0;
  27.393 +                        
  27.394 +                        h_pfn->flags = current->domain | PGT_l1_page_table;
  27.395 +                        g_pfn->flags = PGT_l1_page_table;
  27.396 +
  27.397 +
  27.398 +                        *g_pte = (*g_pte & ~PAGE_MASK) | (((h_pfn - frame_table) << PAGE_SHIFT) & PAGE_MASK);
  27.399 +
  27.400 +                        *g_pte |= _PAGE_PRESENT;
  27.401 +                        unmap_domain_mem(g_pte);
  27.402 +
  27.403 +                        skb->pf = g_pfn; // return the guest pfn to be put on the free list
  27.404 +                    } else {
  27.405 +                        BUG(); //got a non-zero copy skb.  which is not good.
  27.406 +                    }
  27.407 +                    
  27.408                  }
  27.409                  net_ring->rx_cons = (i+1) & (RX_RING_SIZE-1);
  27.410                  if ( net_ring->rx_cons == net_ring->rx_event )
  27.411 -                    set_bit(_EVENT_NET_RX_FOR_VIF(nvif), &s->events);
  27.412 +                    set_bit(_EVENT_NET_RX, &s->events);
  27.413              }
  27.414              kfree_skb(skb);
  27.415          }
  27.416 @@ -1909,96 +2136,274 @@ int __init net_dev_init(void)
  27.417  	return 0;
  27.418  }
  27.419  
  27.420 +inline int init_tx_header(u8 *data, unsigned int len, struct net_device *dev)
  27.421 +{
  27.422 +        memcpy(data + ETH_ALEN, dev->dev_addr, ETH_ALEN);
  27.423 +        
  27.424 +        switch ( ntohs(*(unsigned short *)(data + 12)) )
  27.425 +        {
  27.426 +        case ETH_P_ARP:
  27.427 +            if ( len < 42 ) break;
  27.428 +            memcpy(data + 22, dev->dev_addr, 6);
  27.429 +            return ETH_P_ARP;
  27.430 +        case ETH_P_IP:
  27.431 +            return ETH_P_IP;
  27.432 +        }
  27.433 +        return 0;
  27.434 +}
  27.435  
  27.436 +/* 
  27.437 + * tx_skb_release
  27.438 + *
  27.439 + * skb destructor function that is attached to zero-copy tx skbs before 
  27.440 + * they are passed to the device driver for transmission.  The destructor 
  27.441 + * is responsible for unlinking the fragment pointer to the skb data that 
  27.442 + * is in guest memory, and decrementing the tot_count on the packet pages 
  27.443 + * pfn_info.
  27.444 + */
  27.445 +
  27.446 +void tx_skb_release(struct sk_buff *skb)
  27.447 +{
  27.448 +    int i;
  27.449 +    
  27.450 +    for (i= 0; i < skb_shinfo(skb)->nr_frags; i++)
  27.451 +        skb_shinfo(skb)->frags[i].page->tot_count--;
  27.452 +    
  27.453 +    skb_shinfo(skb)->nr_frags = 0; 
  27.454 +}
  27.455 +    
  27.456  /*
  27.457   * do_net_update:
  27.458   * 
  27.459   * Called from guest OS to notify updates to its transmit and/or receive
  27.460   * descriptor rings.
  27.461   */
  27.462 -#define TX_RING_INC(_i)    (((_i)+1) & (TX_RING_SIZE-1))
  27.463 -#define RX_RING_INC(_i)    (((_i)+1) & (RX_RING_SIZE-1))
  27.464 -#define TX_RING_ADD(_i,_j) (((_i)+(_j)) & (TX_RING_SIZE-1))
  27.465 -#define RX_RING_ADD(_i,_j) (((_i)+(_j)) & (RX_RING_SIZE-1))
  27.466 +#define PKT_PROT_LEN (ETH_HLEN + 8)
  27.467 +
  27.468 +void print_range2(u8 *start, unsigned int len)
  27.469 +{
  27.470 +    int i=0;
  27.471 +    while (i++ < len)
  27.472 +    {
  27.473 +        printk("%x:",start[i]);
  27.474 +    }
  27.475 +    printk("\n");
  27.476 +}
  27.477 +
  27.478  long do_net_update(void)
  27.479  {
  27.480      shared_info_t *shared = current->shared_info;    
  27.481 -    net_ring_t *net_ring = current->net_ring_base;
  27.482 +    net_ring_t *net_ring;
  27.483 +    net_shadow_ring_t *shadow_ring;
  27.484      net_vif_t *current_vif;
  27.485      unsigned int i, j;
  27.486      struct sk_buff *skb;
  27.487      tx_entry_t tx;
  27.488 -
  27.489 +    rx_shadow_entry_t *rx;
  27.490 +    unsigned long pfn;
  27.491 +    struct pfn_info *page;
  27.492 +    unsigned long *g_pte;
  27.493 +    
  27.494 +    
  27.495      for ( j = 0; j < current->num_net_vifs; j++)
  27.496      {
  27.497          current_vif = current->net_vif_list[j];
  27.498          net_ring = current_vif->net_ring;
  27.499 +        int target;
  27.500 +        u8 *g_data;
  27.501 +        unsigned short protocol;
  27.502 +
  27.503 +        /* First, we send out pending TX descriptors if they exist on this ring.
  27.504 +         */
  27.505 +        
  27.506          for ( i = net_ring->tx_cons; i != net_ring->tx_prod; i = TX_RING_INC(i) )
  27.507          {
  27.508              if ( copy_from_user(&tx, net_ring->tx_ring+i, sizeof(tx)) )
  27.509                  continue;
  27.510  
  27.511 -            if ( TX_RING_INC(i) == net_ring->tx_event )
  27.512 -                set_bit(_EVENT_NET_TX_FOR_VIF(j), &shared->events);
  27.513 -
  27.514 -            skb = alloc_skb(tx.size, GFP_KERNEL);
  27.515 -            if ( skb == NULL ) continue;
  27.516 -            skb_put(skb, tx.size);
  27.517 -            if ( copy_from_user(skb->data, (void *)tx.addr, tx.size) )
  27.518 +            if ( tx.size < PKT_PROT_LEN ) continue; // This should be reasonable.
  27.519 +            
  27.520 +            // Packets must not cross page boundaries.  For now, this is a 
  27.521 +            // kernel panic, later it may become a continue -- silent fail.
  27.522 +            
  27.523 +            if ( ((tx.addr & ~PAGE_MASK) + tx.size) >= PAGE_SIZE ) 
  27.524              {
  27.525 -                kfree_skb(skb);
  27.526 +                DPRINTK("tx.addr: %lx, size: %lu, end: %lu\n", tx.addr, tx.size,
  27.527 +                    (tx.addr &~PAGE_MASK) + tx.size);
  27.528                  continue;
  27.529 +                //BUG();
  27.530              }
  27.531 -            skb->dev = the_dev;
  27.532 +            
  27.533 +            if ( TX_RING_INC(i) == net_ring->tx_event )
  27.534 +                set_bit(_EVENT_NET_TX, &shared->events);
  27.535  
  27.536 -            if ( skb->len < 16 )
  27.537 +            /* Map the skb in from the guest, and get it's delivery target.
  27.538 +             * We need this to know whether the packet is to be sent locally
  27.539 +             * or remotely.
  27.540 +             */
  27.541 +            
  27.542 +            g_data = map_domain_mem(tx.addr);
  27.543 +
  27.544 +//print_range2(g_data, PKT_PROT_LEN);                
  27.545 +            protocol = __constant_htons(init_tx_header(g_data, tx.size, the_dev));
  27.546 +            if ( protocol == 0 )
  27.547              {
  27.548 -                kfree_skb(skb);
  27.549 +                unmap_domain_mem(g_data);
  27.550                  continue;
  27.551              }
  27.552  
  27.553 -            memcpy(skb->data + ETH_ALEN, skb->dev->dev_addr, ETH_ALEN);
  27.554 -        
  27.555 -            switch ( ntohs(*(unsigned short *)(skb->data + 12)) )
  27.556 +            target = __net_get_target_vif(g_data, tx.size, current_vif->id);
  27.557 +//printk("Send to target: %d\n", target); 
  27.558 +            if (target > VIF_PHYSICAL_INTERFACE )
  27.559              {
  27.560 -            case ETH_P_ARP:
  27.561 -                skb->protocol = __constant_htons(ETH_P_ARP);
  27.562 -                if ( skb->len < 42 ) break;
  27.563 -                memcpy(skb->data + 22, skb->dev->dev_addr, 6);
  27.564 -                break;
  27.565 -            case ETH_P_IP:
  27.566 -                skb->protocol = __constant_htons(ETH_P_IP);
  27.567 -                break;
  27.568 -            default:
  27.569 -                kfree_skb(skb);
  27.570 -                skb = NULL;
  27.571 -                break;
  27.572 -            }
  27.573 +                // Local delivery: Allocate an skb off the domain free list
  27.574 +                // fil it, and pass it to netif_rx as if it came off the NIC.
  27.575 +//printk("LOCAL! (%d) \n", target);
  27.576 +                skb = dev_alloc_skb(tx.size);
  27.577 +                if (skb == NULL) 
  27.578 +                {
  27.579 +                    unmap_domain_mem(g_data);
  27.580 +                    continue;
  27.581 +                }
  27.582 +                
  27.583 +                skb->src_vif = current_vif->id;
  27.584 +                skb->dst_vif = target;
  27.585 +                skb->protocol = protocol;
  27.586 +
  27.587 +                skb->head = (u8 *)map_domain_mem(((skb->pf - frame_table) << PAGE_SHIFT));
  27.588 +                skb->data = skb->head + 16;
  27.589 +                skb_reserve(skb,2);
  27.590 +                memcpy(skb->data, g_data, tx.size);
  27.591 +                skb->len = tx.size;
  27.592 +                unmap_domain_mem(skb->head);
  27.593 +                skb->data += ETH_HLEN; // so the assertion in netif_RX doesn't freak out.
  27.594 +                
  27.595 +                (void)netif_rx(skb);
  27.596  
  27.597 -            if ( skb != NULL )
  27.598 +                unmap_domain_mem(g_data);
  27.599 +            }
  27.600 +            else if ( target == VIF_PHYSICAL_INTERFACE )
  27.601              {
  27.602 -                skb->protocol = eth_type_trans(skb, skb->dev);
  27.603 -                skb->src_vif = current_vif->id; 
  27.604 -                net_get_target_vif(skb);
  27.605 -                if ( skb->dst_vif > VIF_PHYSICAL_INTERFACE )
  27.606 +                // External delivery: Allocate a small skb to hold protected header info
  27.607 +                // and copy the eth header and IP address fields into that.
  27.608 +                // Set a frag link to the remaining data, and we will scatter-gather
  27.609 +                // in the device driver to send the two bits later.
  27.610 +                
  27.611 +                /*unmap_domain_mem(g_data);*/
  27.612 +                    
  27.613 +                skb = alloc_skb(PKT_PROT_LEN, GFP_KERNEL); // Eth header + two IP addrs.
  27.614 +                if (skb == NULL) 
  27.615                  {
  27.616 -                    (void)netif_rx(skb);
  27.617 +printk("Alloc skb failed!\n");
  27.618 +                    continue;
  27.619 +                }
  27.620 +            
  27.621 +                skb_put(skb, PKT_PROT_LEN);
  27.622 +                /*if ( copy_from_user(skb->data, (void *)tx.addr, PKT_PROT_LEN) )
  27.623 +                {
  27.624 +printk("Copy from user failed!\n");
  27.625 +                    kfree_skb(skb);
  27.626 +                    continue;
  27.627                  }
  27.628 -                else if ( skb->dst_vif == VIF_PHYSICAL_INTERFACE )
  27.629 -                {
  27.630 -                    skb_push(skb, skb->dev->hard_header_len);
  27.631 -                    dev_queue_xmit(skb);
  27.632 -                } 
  27.633 -                else
  27.634 -                {
  27.635 -                    kfree_skb(skb);
  27.636 -                }
  27.637 +                */
  27.638 +                memcpy(skb->data, g_data, PKT_PROT_LEN);
  27.639 +                unmap_domain_mem(g_data);
  27.640 +//print_range2(g_data, PKT_PROT_LEN);                
  27.641 +                skb->dev = the_dev;
  27.642 +                skb->src_vif = current_vif->id;
  27.643 +                skb->dst_vif = target;
  27.644 +                skb->protocol = protocol; // These next two lines abbreviate the call 
  27.645 +                                          // to eth_type_trans as we already have our
  27.646 +                                          // protocol.
  27.647 +                //skb_pull(skb, skb->dev->hard_header_len);
  27.648 +                skb->mac.raw=skb->data; 
  27.649 +
  27.650 +                // set tot_count++ in the guest data pfn.
  27.651 +                page = (tx.addr >> PAGE_SHIFT) + frame_table;
  27.652 +                page->tot_count++;
  27.653  
  27.654 +                // place the remainder of the packet (which is in guest memory) into an
  27.655 +                // skb frag.
  27.656 +                skb_shinfo(skb)->frags[0].page = page;
  27.657 +                skb_shinfo(skb)->frags[0].size = tx.size - PKT_PROT_LEN;
  27.658 +                skb_shinfo(skb)->frags[0].page_offset 
  27.659 +                    = (tx.addr & ~PAGE_MASK) + PKT_PROT_LEN;
  27.660 +                skb_shinfo(skb)->nr_frags = 1;
  27.661 +                skb->data_len = tx.size - skb->len;
  27.662 +                skb->len = tx.size;
  27.663 +                
  27.664 +                // assign a destructor to the skb that will unlink and dec the tot_count
  27.665 +                skb->destructor = &tx_skb_release;
  27.666 +                //skb_push(skb, skb->dev->hard_header_len);
  27.667 +//printk("calling dev_queue_xmit!\n");
  27.668 +                dev_queue_xmit(skb);
  27.669 +            }
  27.670 +            else
  27.671 +            {
  27.672 +                unmap_domain_mem(g_data);
  27.673              }
  27.674          }
  27.675          net_ring->tx_cons = i;
  27.676 +
  27.677 +        /* Next, pull any new RX descriptors across to the shadow ring.
  27.678 +         */
  27.679 +    
  27.680 +        shadow_ring = current_vif->shadow_ring;
  27.681 +
  27.682 +        for (i = shadow_ring->rx_prod; i != net_ring->rx_prod; i = RX_RING_INC(i))
  27.683 +        {
  27.684 +            /* This copy assumes that rx_shadow_entry_t is an extension of 
  27.685 +             * rx_net_entry_t extra fields must be tacked on to the end.
  27.686 +             */
  27.687 +            if ( copy_from_user( shadow_ring->rx_ring+i, net_ring->rx_ring+i, 
  27.688 +                                 sizeof (rx_entry_t) ) )
  27.689 +            {
  27.690 +                shadow_ring->rx_ring[i].status = RING_STATUS_ERR_CFU;
  27.691 +                continue;
  27.692 +            } else {
  27.693 +                    
  27.694 +                rx = shadow_ring->rx_ring + i;
  27.695 +                pfn = rx->addr >> PAGE_SHIFT;
  27.696 +                page = frame_table + pfn;
  27.697 +                
  27.698 +                shadow_ring->rx_ring[i].status = RING_STATUS_BAD_PAGE;
  27.699 +
  27.700 +                if  ( page->flags != (PGT_l1_page_table | current->domain) ) 
  27.701 +                {
  27.702 +BUG();
  27.703 +                       continue;
  27.704 +                }
  27.705 +
  27.706 +
  27.707 +                g_pte = map_domain_mem(rx->addr);
  27.708 +
  27.709 +                if (!(*g_pte & _PAGE_PRESENT))
  27.710 +                {
  27.711 +BUG();
  27.712 +                        unmap_domain_mem(g_pte);
  27.713 +                        continue;
  27.714 +                }
  27.715 +                
  27.716 +                page = (*g_pte >> PAGE_SHIFT) + frame_table;
  27.717 +                
  27.718 +                if (page->tot_count != 1) 
  27.719 +                {
  27.720 +printk("!\n");
  27.721 +                        unmap_domain_mem(g_pte);
  27.722 +                        continue;
  27.723 +                }
  27.724 +                
  27.725 +                // The pte they passed was good, so we take it away from them.
  27.726 +                shadow_ring->rx_ring[i].status = RING_STATUS_OK;
  27.727 +                *g_pte &= ~_PAGE_PRESENT;
  27.728 +                page->flags = (page->flags & ~PG_type_mask) | PGT_net_rx_buf;
  27.729 +                rx->flush_count = tlb_flush_count[smp_processor_id()];
  27.730 +
  27.731 +                unmap_domain_mem(g_pte);
  27.732 +            }
  27.733 +        }
  27.734 +        shadow_ring->rx_prod = net_ring->rx_prod;
  27.735      }
  27.736 -
  27.737      return 0;
  27.738  }
  27.739  
    28.1 --- a/xen-2.4.16/net/eth.c	Fri Feb 14 13:18:19 2003 +0000
    28.2 +++ b/xen-2.4.16/net/eth.c	Fri Feb 14 14:27:45 2003 +0000
    28.3 @@ -161,52 +161,62 @@ unsigned short eth_type_trans(struct sk_
    28.4  	struct ethhdr *eth;
    28.5  	unsigned char *rawp;
    28.6  	
    28.7 -	skb->mac.raw=skb->data;
    28.8 -	skb_pull(skb,dev->hard_header_len);
    28.9 -	eth= skb->mac.ethernet;
   28.10 +        if (skb->skb_type == SKB_ZERO_COPY)
   28.11 +        {
   28.12 +            skb_pull(skb,dev->hard_header_len);
   28.13 +            skb->mac.raw= (void *)0xdeadbeef;
   28.14 +            return htons(ETH_P_802_2);
   28.15 +            
   28.16 +        } else { // SKB_NORMAL
   28.17 +        
   28.18 +	    skb->mac.raw=skb->data;
   28.19 +	    skb_pull(skb,dev->hard_header_len);
   28.20 +	    eth= skb->mac.ethernet;
   28.21  	
   28.22 -	if(*eth->h_dest&1)
   28.23 -	{
   28.24 -		if(memcmp(eth->h_dest,dev->broadcast, ETH_ALEN)==0)
   28.25 +	    if(*eth->h_dest&1)
   28.26 +	    {
   28.27 +	    	if(memcmp(eth->h_dest,dev->broadcast, ETH_ALEN)==0)
   28.28  			skb->pkt_type=PACKET_BROADCAST;
   28.29  		else
   28.30  			skb->pkt_type=PACKET_MULTICAST;
   28.31 -	}
   28.32 +	    }
   28.33  	
   28.34 -	/*
   28.35 -	 *	This ALLMULTI check should be redundant by 1.4
   28.36 -	 *	so don't forget to remove it.
   28.37 -	 *
   28.38 -	 *	Seems, you forgot to remove it. All silly devices
   28.39 -	 *	seems to set IFF_PROMISC.
   28.40 -	 */
   28.41 +	    /*
   28.42 +	    *	This ALLMULTI check should be redundant by 1.4
   28.43 +	    *	so don't forget to remove it.
   28.44 +	    *
   28.45 +	    *	Seems, you forgot to remove it. All silly devices
   28.46 +	    *	seems to set IFF_PROMISC.
   28.47 +	    */
   28.48  	 
   28.49 -	else if(1 /*dev->flags&IFF_PROMISC*/)
   28.50 -	{
   28.51 +	    else if(1 /*dev->flags&IFF_PROMISC*/)
   28.52 +	    {
   28.53  		if(memcmp(eth->h_dest,dev->dev_addr, ETH_ALEN))
   28.54  			skb->pkt_type=PACKET_OTHERHOST;
   28.55 -	}
   28.56 +	    }
   28.57  	
   28.58 -	if (ntohs(eth->h_proto) >= 1536)
   28.59 +	    if (ntohs(eth->h_proto) >= 1536)
   28.60  		return eth->h_proto;
   28.61  		
   28.62 -	rawp = skb->data;
   28.63 +	    rawp = skb->data;
   28.64  	
   28.65 -	/*
   28.66 -	 *	This is a magic hack to spot IPX packets. Older Novell breaks
   28.67 -	 *	the protocol design and runs IPX over 802.3 without an 802.2 LLC
   28.68 -	 *	layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This
   28.69 -	 *	won't work for fault tolerant netware but does for the rest.
   28.70 -	 */
   28.71 -	if (*(unsigned short *)rawp == 0xFFFF)
   28.72 +	    /*
   28.73 +	    *	This is a magic hack to spot IPX packets. Older Novell breaks
   28.74 +	    *	the protocol design and runs IPX over 802.3 without an 802.2 LLC
   28.75 +	    *	layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This
   28.76 +	    *	won't work for fault tolerant netware but does for the rest.
   28.77 +	    */
   28.78 +	    if (*(unsigned short *)rawp == 0xFFFF)
   28.79  		return htons(ETH_P_802_3);
   28.80  		
   28.81 -	/*
   28.82 -	 *	Real 802.2 LLC
   28.83 -	 */
   28.84 -	return htons(ETH_P_802_2);
   28.85 +	    /*
   28.86 +	    *	Real 802.2 LLC
   28.87 +	    */
   28.88 +	    return htons(ETH_P_802_2);
   28.89 +        }
   28.90  }
   28.91  
   28.92 +
   28.93  int eth_header_parse(struct sk_buff *skb, unsigned char *haddr)
   28.94  {
   28.95  	struct ethhdr *eth = skb->mac.ethernet;
    29.1 --- a/xen-2.4.16/net/skbuff.c	Fri Feb 14 13:18:19 2003 +0000
    29.2 +++ b/xen-2.4.16/net/skbuff.c	Fri Feb 14 14:27:45 2003 +0000
    29.3 @@ -63,6 +63,7 @@
    29.4  
    29.5  #include <asm/uaccess.h>
    29.6  #include <asm/system.h>
    29.7 +#include <asm/io.h>
    29.8  
    29.9  #define BUG_TRAP ASSERT
   29.10  
   29.11 @@ -149,6 +150,114 @@ static __inline__ void skb_head_to_pool(
   29.12  	kmem_cache_free(skbuff_head_cache, skb);
   29.13  }
   29.14  
   29.15 +//static unsigned long skbpagesout=0, skbpagesin=0;
   29.16 +
   29.17 +static inline u8 *alloc_skb_data_page(struct sk_buff *skb)
   29.18 +{
   29.19 +        struct list_head *list_ptr;
   29.20 +        struct pfn_info  *pf;
   29.21 +        unsigned long flags;
   29.22 +        
   29.23 +        spin_lock_irqsave(&free_list_lock, flags);
   29.24 +
   29.25 +        if (!free_pfns) return NULL;
   29.26 +
   29.27 +        list_ptr = free_list.next;
   29.28 +        pf = list_entry(list_ptr, struct pfn_info, list);
   29.29 +        pf->flags = 0; // owned by dom0
   29.30 +        list_del(&pf->list);
   29.31 +        //pf->next = pf->prev = (pf - frame_table);
   29.32 +        free_pfns--;
   29.33 +
   29.34 +        spin_unlock_irqrestore(&free_list_lock, flags);
   29.35 +
   29.36 +        skb->pf = pf;
   29.37 +//if (skbpagesout++ % 100 == 0) printk("XEN-: skb allocs: %lu\n", skbpagesout);
   29.38 +        return (u8 *)((pf - frame_table) << PAGE_SHIFT);
   29.39 +}
   29.40 +
   29.41 +static inline void dealloc_skb_data_page(struct sk_buff *skb)
   29.42 +{
   29.43 +        struct pfn_info  *pf;
   29.44 +        unsigned long flags;
   29.45 +
   29.46 +        pf = skb->pf;
   29.47 +
   29.48 +        spin_lock_irqsave(&free_list_lock, flags);
   29.49 +        
   29.50 +        list_add(&pf->list, &free_list);
   29.51 +        free_pfns++;
   29.52 +
   29.53 +        spin_unlock_irqrestore(&free_list_lock, flags);
   29.54 +
   29.55 +//if (skbpagesin++ % 100 == 0) printk("XEN-: skb allocs: %lu\n", skbpagesin);
   29.56 +}
   29.57 +
   29.58 +struct sk_buff *alloc_zc_skb(unsigned int size,int gfp_mask)
   29.59 +{
   29.60 +        struct sk_buff *skb;
   29.61 +        u8 *data;
   29.62 +
   29.63 +        if (in_interrupt() && (gfp_mask & __GFP_WAIT)) {
   29.64 +                static int count = 0;
   29.65 +                if (++count < 5) {
   29.66 +                        printk(KERN_ERR "alloc_skb called nonatomically "
   29.67 +                               "from interrupt %p\n", NET_CALLER(size));
   29.68 +                        BUG();
   29.69 +                }
   29.70 +                gfp_mask &= ~__GFP_WAIT;
   29.71 +        }
   29.72 +
   29.73 +        /* Get the HEAD */
   29.74 +        skb = skb_head_from_pool();
   29.75 +        if (skb == NULL) {
   29.76 +                skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask & ~__GFP_DMA);
   29.77 +                if (skb == NULL)
   29.78 +                        goto nohead;
   29.79 +        }
   29.80 +
   29.81 +        /* Get the DATA. Size must match skb_add_mtu(). */
   29.82 +        size = SKB_DATA_ALIGN(size);
   29.83 +        data = alloc_skb_data_page(skb);
   29.84 +
   29.85 +        if (data == NULL)
   29.86 +                goto nodata;
   29.87 +
   29.88 +        // This is so that pci_map_single does the right thing in the driver.
   29.89 +        // If you want to ever use this pointer otherwise, you need to regenerate it 
   29.90 +        // based on skb->pf.
   29.91 +        data = phys_to_virt((unsigned long)data); 
   29.92 +        
   29.93 +        /* XXX: does not include slab overhead */
   29.94 +        skb->truesize = size + sizeof(struct sk_buff);
   29.95 +
   29.96 +        /* Load the data pointers. */
   29.97 +        skb->head = data;
   29.98 +        skb->data = data;
   29.99 +        skb->tail = data;
  29.100 +        skb->end = data + size;
  29.101 +
  29.102 +        /* Set up other state */
  29.103 +        skb->len = 0;
  29.104 +        skb->cloned = 0;
  29.105 +        skb->data_len = 0;
  29.106 +        skb->src_vif = VIF_UNKNOWN_INTERFACE;
  29.107 +        skb->dst_vif = VIF_UNKNOWN_INTERFACE;
  29.108 +        skb->skb_type = SKB_ZERO_COPY;
  29.109 +
  29.110 +        atomic_set(&skb->users, 1);
  29.111 +        atomic_set(&(skb_shinfo(skb)->dataref), 1);
  29.112 +        skb_shinfo(skb)->nr_frags = 0;
  29.113 +        skb_shinfo(skb)->frag_list = NULL;
  29.114 +
  29.115 +        return skb;
  29.116 +
  29.117 +nodata:
  29.118 +        skb_head_to_pool(skb);
  29.119 +nohead:
  29.120 +        return NULL;
  29.121 +}
  29.122 +
  29.123  
  29.124  /* 	Allocate a new skbuff. We do this ourselves so we can fill in a few
  29.125   *	'private' fields and also do memory statistics to find all the
  29.126 @@ -213,6 +322,7 @@ struct sk_buff *alloc_skb(unsigned int s
  29.127  	skb->data_len = 0;
  29.128          skb->src_vif = VIF_UNKNOWN_INTERFACE;
  29.129          skb->dst_vif = VIF_UNKNOWN_INTERFACE;
  29.130 +        skb->skb_type = SKB_NORMAL;
  29.131  
  29.132  	atomic_set(&skb->users, 1); 
  29.133  	atomic_set(&(skb_shinfo(skb)->dataref), 1);
  29.134 @@ -284,6 +394,7 @@ static void skb_clone_fraglist(struct sk
  29.135  
  29.136  static void skb_release_data(struct sk_buff *skb)
  29.137  {
  29.138 +
  29.139  	if (!skb->cloned ||
  29.140  	    atomic_dec_and_test(&(skb_shinfo(skb)->dataref))) {
  29.141  		if (skb_shinfo(skb)->nr_frags) {
  29.142 @@ -295,7 +406,12 @@ static void skb_release_data(struct sk_b
  29.143  		if (skb_shinfo(skb)->frag_list)
  29.144  			skb_drop_fraglist(skb);
  29.145  
  29.146 -		kfree(skb->head);
  29.147 +                if (skb->skb_type == SKB_NORMAL) {
  29.148 +		    kfree(skb->head);
  29.149 +                } else if (skb->skb_type == SKB_ZERO_COPY) {                    dealloc_skb_data_page(skb);
  29.150 +                } else {
  29.151 +                    BUG(); //skb_release_data called with unknown skb type!
  29.152 +                }
  29.153  	}
  29.154  }
  29.155  
  29.156 @@ -333,6 +449,7 @@ void __kfree_skb(struct sk_buff *skb)
  29.157  		}
  29.158  		skb->destructor(skb);
  29.159  	}
  29.160 +
  29.161  #ifdef CONFIG_NETFILTER
  29.162  	nf_conntrack_put(skb->nfct);
  29.163  #endif
    30.1 --- a/xenolinux-2.4.16-sparse/arch/xeno/drivers/network/network.c	Fri Feb 14 13:18:19 2003 +0000
    30.2 +++ b/xenolinux-2.4.16-sparse/arch/xeno/drivers/network/network.c	Fri Feb 14 14:27:45 2003 +0000
    30.3 @@ -21,14 +21,12 @@
    30.4  #include <linux/skbuff.h>
    30.5  #include <linux/init.h>
    30.6  
    30.7 +#include <asm/io.h>
    30.8  #include <net/sock.h>
    30.9  
   30.10  #define NET_TX_IRQ _EVENT_NET_TX
   30.11  #define NET_RX_IRQ _EVENT_NET_RX
   30.12  
   30.13 -#define NET_TX_IRQ_FOR_VIF(x) _EVENT_NET_TX_FOR_VIF(x)
   30.14 -#define NET_RX_IRQ_FOR_VIF(x) _EVENT_NET_RX_FOR_VIF(x)
   30.15 -
   30.16  #define TX_MAX_ENTRIES (TX_RING_SIZE - 2)
   30.17  #define RX_MAX_ENTRIES (RX_RING_SIZE - 2)
   30.18  
   30.19 @@ -37,7 +35,7 @@
   30.20  #define TX_RING_ADD(_i,_j) (((_i)+(_j)) & (TX_RING_SIZE-1))
   30.21  #define RX_RING_ADD(_i,_j) (((_i)+(_j)) & (RX_RING_SIZE-1))
   30.22  
   30.23 -#define RX_BUF_SIZE 1600 /* Ethernet MTU + plenty of slack! */
   30.24 +#define RX_BUF_SIZE 2049 /* (was 1600) Ethernet MTU + plenty of slack! */
   30.25  
   30.26  static void network_rx_int(int irq, void *dev_id, struct pt_regs *ptregs);
   30.27  static void network_tx_int(int irq, void *dev_id, struct pt_regs *ptregs);
   30.28 @@ -48,6 +46,8 @@ static void cleanup_module(void);
   30.29  
   30.30  static struct list_head dev_list;
   30.31  
   30.32 +static unsigned int net_countx;
   30.33 +
   30.34  /*
   30.35   * RX RING:   RX_IDX <= rx_cons <= rx_prod
   30.36   * TX RING:   TX_IDX <= tx_cons <= tx_prod
   30.37 @@ -72,16 +72,8 @@ struct net_private
   30.38  static int network_open(struct net_device *dev)
   30.39  {
   30.40      struct net_private *np = dev->priv;
   30.41 -    int error;
   30.42 -    char *rxlabel, *txlabel;
   30.43 +    int error = 0;
   30.44  
   30.45 -    // This is inevitably not the right way to allocate a couple of static strings.
   30.46 -    rxlabel = kmalloc(sizeof("net-rx- "), GFP_KERNEL);
   30.47 -    txlabel = kmalloc(sizeof("net-tx- "), GFP_KERNEL);
   30.48 -    if ((rxlabel == NULL) || (txlabel == NULL)) goto fail;
   30.49 -    sprintf(rxlabel, "net-rx-%d", np->id);
   30.50 -    sprintf(txlabel, "net-tx-%d", np->id);
   30.51 -    
   30.52      np->rx_idx = np->tx_idx = np->tx_full = 0;
   30.53  
   30.54      memset(&np->stats, 0, sizeof(np->stats));
   30.55 @@ -113,8 +105,8 @@ static int network_open(struct net_devic
   30.56  
   30.57      network_alloc_rx_buffers(dev);
   30.58  
   30.59 -    error = request_irq(NET_RX_IRQ_FOR_VIF(np->id), network_rx_int, 0, 
   30.60 -                    rxlabel, dev);
   30.61 +    error = request_irq(NET_RX_IRQ, network_rx_int, 0, 
   30.62 +                    "net-rx", dev);
   30.63      if ( error )
   30.64      {
   30.65          printk(KERN_WARNING "%s: Could not allocate receive interrupt\n",
   30.66 @@ -122,13 +114,13 @@ static int network_open(struct net_devic
   30.67          goto fail;
   30.68      }
   30.69  
   30.70 -    error = request_irq(NET_TX_IRQ_FOR_VIF(np->id), network_tx_int, 0, 
   30.71 -                    txlabel, dev);
   30.72 +    error = request_irq(NET_TX_IRQ, network_tx_int, 0, 
   30.73 +                    "net-tx", dev);
   30.74      if ( error )
   30.75      {
   30.76          printk(KERN_WARNING "%s: Could not allocate transmit interrupt\n",
   30.77                 dev->name);
   30.78 -        free_irq(NET_RX_IRQ_FOR_VIF(np->id), dev);
   30.79 +        free_irq(NET_RX_IRQ, dev);
   30.80          goto fail;
   30.81      }
   30.82  
   30.83 @@ -141,8 +133,6 @@ static int network_open(struct net_devic
   30.84      return 0;
   30.85  
   30.86   fail:
   30.87 -    if ( rxlabel ) kfree(rxlabel);
   30.88 -    if ( txlabel ) kfree(txlabel);
   30.89      if ( np->net_ring->rx_ring ) kfree(np->net_ring->rx_ring);
   30.90      if ( np->net_ring->tx_ring ) kfree(np->net_ring->tx_ring);
   30.91      if ( np->rx_skb_ring ) kfree(np->rx_skb_ring);
   30.92 @@ -179,13 +169,59 @@ static void network_tx_buf_gc(struct net
   30.93      spin_unlock_irqrestore(&np->tx_lock, flags);
   30.94  }
   30.95  
   30.96 +inline unsigned long get_ppte(unsigned long addr)
   30.97 +{
   30.98 +        unsigned long ppte = 0xdeadbeef;
   30.99 +        pgd_t *pgd; pmd_t *pmd; pte_t *ptep;
  30.100 +        pgd = pgd_offset_k(addr);
  30.101  
  30.102 +        if (pgd_none(*pgd) || pgd_bad(*pgd)) BUG();
  30.103 +        
  30.104 +        pmd = pmd_offset(pgd, addr);
  30.105 +        if (pmd_none(*pmd)) BUG(); 
  30.106 +        if (pmd_bad(*pmd)) BUG();
  30.107 +        
  30.108 +        ptep = pte_offset(pmd, addr);
  30.109 +        ppte = (unsigned long)phys_to_machine(virt_to_phys(ptep));
  30.110 +
  30.111 +        return ppte;
  30.112 +}
  30.113 +/*
  30.114 +static void validate_free_list(void)
  30.115 +{
  30.116 +    unsigned long addr, ppfn, mpfn, mpfn2, flags;
  30.117 +    struct list_head *i;
  30.118 +    struct net_page_info *np;
  30.119 +
  30.120 +    printk(KERN_ALERT "Walking free pages:\n");
  30.121 +   
  30.122 +    spin_lock_irqsave(&net_page_list_lock, flags);
  30.123 +    
  30.124 +    list_for_each(i, &net_page_list) 
  30.125 +    {
  30.126 +        np = list_entry(i, struct net_page_info, list);
  30.127 +        addr = np->virt_addr;
  30.128 +        ppfn = virt_to_phys(addr) >> PAGE_SHIFT;
  30.129 +        mpfn = get_ppte(addr);
  30.130 +        mpfn2 = phys_to_machine_mapping[ppfn];
  30.131 +
  30.132 +        mpfn = (*(unsigned long *)phys_to_virt(machine_to_phys(mpfn))) >> PAGE_SHIFT;
  30.133 +        if (mpfn != mpfn2) printk(KERN_ALERT "mpfn %lu != %lu\n", mpfn, mpfn2);
  30.134 +
  30.135 +        if (machine_to_phys_mapping[mpfn] != ppfn) printk(KERN_ALERT "ppfn %lu != %lu\n", machine_to_phys_mapping[mpfn], ppfn);
  30.136 +    }
  30.137 +
  30.138 +    spin_unlock_irqrestore(&net_page_list_lock, flags);
  30.139 +    
  30.140 +}
  30.141 +*/
  30.142  static void network_alloc_rx_buffers(struct net_device *dev)
  30.143  {
  30.144      unsigned int i;
  30.145      struct net_private *np = dev->priv;
  30.146      struct sk_buff *skb;
  30.147      unsigned int end = RX_RING_ADD(np->rx_idx, RX_MAX_ENTRIES);
  30.148 +    
  30.149  
  30.150      for ( i = np->net_ring->rx_prod; i != end; i = RX_RING_INC(i) )
  30.151      {
  30.152 @@ -194,8 +230,9 @@ static void network_alloc_rx_buffers(str
  30.153          skb->dev = dev;
  30.154          skb_reserve(skb, 2); /* word align the IP header */
  30.155          np->rx_skb_ring[i] = skb;
  30.156 -        np->net_ring->rx_ring[i].addr = (unsigned long)skb->data;
  30.157 +        np->net_ring->rx_ring[i].addr = get_ppte(skb->head); 
  30.158          np->net_ring->rx_ring[i].size = RX_BUF_SIZE - 16; /* arbitrary */
  30.159 +//printk(KERN_ALERT "[%p]\n", phys_to_machine(virt_to_phys(skb->page_ptr)));
  30.160      }
  30.161  
  30.162      np->net_ring->rx_prod = i;
  30.163 @@ -219,6 +256,14 @@ static void network_free_rx_buffers(stru
  30.164      }
  30.165  }
  30.166  
  30.167 +void print_range(u8 *start, unsigned int len)
  30.168 +{
  30.169 +    int i = 0;
  30.170 +
  30.171 +    while (i++ < len)
  30.172 +        printk("%x:", start[i]);
  30.173 +    printk("\n");
  30.174 +}
  30.175  
  30.176  static int network_start_xmit(struct sk_buff *skb, struct net_device *dev)
  30.177  {
  30.178 @@ -231,10 +276,23 @@ static int network_start_xmit(struct sk_
  30.179          netif_stop_queue(dev);
  30.180          return -ENOBUFS;
  30.181      }
  30.182 -
  30.183 +//print_range(skb->data, ETH_HLEN + 8);
  30.184 +//print_range(skb->data + ETH_HLEN + 8, 20);
  30.185 +//printk("skb->len is %u in guestOS (expected fraglen: %u).\n", skb->len, skb->len - (ETH_HLEN + 8));
  30.186      i = np->net_ring->tx_prod;
  30.187 +
  30.188 +    if ( (((unsigned long)skb->data & ~PAGE_MASK) + skb->len) >= PAGE_SIZE )
  30.189 +    {
  30.190 +        struct sk_buff *new_skb = alloc_skb(RX_BUF_SIZE, GFP_KERNEL);
  30.191 +        skb_put(new_skb, skb->len);
  30.192 +        memcpy(new_skb->data, skb->data, skb->len);
  30.193 +        kfree_skb(skb);
  30.194 +        skb = new_skb;
  30.195 +    }   
  30.196 +    
  30.197      np->tx_skb_ring[i] = skb;
  30.198 -    np->net_ring->tx_ring[i].addr = (unsigned long)skb->data;
  30.199 +    np->net_ring->tx_ring[i].addr 
  30.200 +        = (unsigned long)phys_to_machine(virt_to_phys(skb->data));
  30.201      np->net_ring->tx_ring[i].size = skb->len;
  30.202      np->net_ring->tx_prod = TX_RING_INC(i);
  30.203      atomic_inc(&np->tx_entries);
  30.204 @@ -273,13 +331,30 @@ static void network_rx_int(int irq, void
  30.205      struct net_private *np = dev->priv;
  30.206      struct sk_buff *skb;
  30.207      
  30.208 +    /*if (net_countx++ % 100 == 0) validate_free_list();*/
  30.209 +    
  30.210   again:
  30.211      for ( i = np->rx_idx; i != np->net_ring->rx_cons; i = RX_RING_INC(i) )
  30.212      {
  30.213 +        if (np->net_ring->rx_ring[i].status != RING_STATUS_OK)
  30.214 +        {
  30.215 +                printk("bad buffer on RX ring!(%d)\n", 
  30.216 +                                np->net_ring->rx_ring[i].status);
  30.217 +                continue;
  30.218 +        }
  30.219          skb = np->rx_skb_ring[i];
  30.220 +
  30.221 +//printk(KERN_ALERT "[%u]: ptmm[%lx] old:(%lx) new:(%lx)\n", i , virt_to_phys(skb->head) >> PAGE_SHIFT, phys_to_machine_mapping[virt_to_phys(skb->head) >> PAGE_SHIFT], (*(unsigned long *)phys_to_virt(machine_to_phys(np->net_ring->rx_ring[i].addr))) >> PAGE_SHIFT);
  30.222 +
  30.223 +        phys_to_machine_mapping[virt_to_phys(skb->head) >> PAGE_SHIFT] =
  30.224 +            (*(unsigned long *)phys_to_virt(
  30.225 +                    machine_to_phys(np->net_ring->rx_ring[i].addr))
  30.226 +             ) >> PAGE_SHIFT;
  30.227 +
  30.228          skb_put(skb, np->net_ring->rx_ring[i].size);
  30.229          skb->protocol = eth_type_trans(skb, dev);
  30.230          np->stats.rx_packets++;
  30.231 +
  30.232          np->stats.rx_bytes += np->net_ring->rx_ring[i].size;
  30.233          netif_rx(skb);
  30.234          dev->last_rx = jiffies;
  30.235 @@ -309,8 +384,8 @@ int network_close(struct net_device *dev
  30.236      struct net_private *np = dev->priv;
  30.237  
  30.238      netif_stop_queue(dev);
  30.239 -    free_irq(NET_RX_IRQ_FOR_VIF(np->id), dev);
  30.240 -    free_irq(NET_TX_IRQ_FOR_VIF(np->id), dev);
  30.241 +    free_irq(NET_RX_IRQ, dev);
  30.242 +    free_irq(NET_TX_IRQ, dev);
  30.243      network_free_rx_buffers(dev);
  30.244      kfree(np->net_ring->rx_ring);
  30.245      kfree(np->net_ring->tx_ring);
    31.1 --- a/xenolinux-2.4.16-sparse/include/asm-xeno/io.h	Fri Feb 14 13:18:19 2003 +0000
    31.2 +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/io.h	Fri Feb 14 14:27:45 2003 +0000
    31.3 @@ -2,7 +2,7 @@
    31.4  #define _ASM_IO_H
    31.5  
    31.6  #include <linux/config.h>
    31.7 -
    31.8 +#include <asm/hypervisor.h>
    31.9  /*
   31.10   * This file contains the definitions for the x86 IO instructions
   31.11   * inb/inw/inl/outb/outw/outl and the "string versions" of the same
   31.12 @@ -74,6 +74,11 @@ static inline void * phys_to_virt(unsign
   31.13  }
   31.14  
   31.15  /*
   31.16 + * Change virtual addresses to machine addresses and vv.
   31.17 + * These are equally trivial.
   31.18 + */
   31.19 +
   31.20 +/*
   31.21   * Change "struct page" to physical address.
   31.22   */
   31.23  #define page_to_phys(page)	((page - mem_map) << PAGE_SHIFT)
    32.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    32.2 +++ b/xenolinux-2.4.16-sparse/include/linux/skbuff.h	Fri Feb 14 14:27:45 2003 +0000
    32.3 @@ -0,0 +1,1186 @@
    32.4 +/*
    32.5 + *	Definitions for the 'struct sk_buff' memory handlers.
    32.6 + *
    32.7 + *	Authors:
    32.8 + *		Alan Cox, <gw4pts@gw4pts.ampr.org>
    32.9 + *		Florian La Roche, <rzsfl@rz.uni-sb.de>
   32.10 + *
   32.11 + *	This program is free software; you can redistribute it and/or
   32.12 + *	modify it under the terms of the GNU General Public License
   32.13 + *	as published by the Free Software Foundation; either version
   32.14 + *	2 of the License, or (at your option) any later version.
   32.15 + */
   32.16 + 
   32.17 +#ifndef _LINUX_SKBUFF_H
   32.18 +#define _LINUX_SKBUFF_H
   32.19 +
   32.20 +#include <linux/config.h>
   32.21 +#include <linux/kernel.h>
   32.22 +#include <linux/sched.h>
   32.23 +#include <linux/time.h>
   32.24 +#include <linux/cache.h>
   32.25 +
   32.26 +#include <asm/atomic.h>
   32.27 +#include <asm/types.h>
   32.28 +#include <linux/spinlock.h>
   32.29 +#include <linux/mm.h>
   32.30 +#include <linux/highmem.h>
   32.31 +
   32.32 +/* Zero Copy additions:
   32.33 + *
   32.34 + * (1) there are now two types of skb, as indicated by the skb_type field.
   32.35 + *     this is because, at least for the time being, there are two seperate types 
   32.36 + *     of memory that may be allocated to skb->data.
   32.37 + *
   32.38 + * (2) until discontiguous memory is fully supported, there will be a free list of pages
   32.39 + *     to be used by the net RX code.  This list will be allocated in the driver init code
   32.40 + *     but is declared here because the socket free code needs to return pages to it.
   32.41 + */
   32.42 +
   32.43 +// for skb->skb_type:
   32.44 +
   32.45 +#define SKB_NORMAL          0
   32.46 +#define SKB_ZERO_COPY       1
   32.47 +
   32.48 +#define NUM_NET_PAGES       9 // about 1Meg of buffers. (2^9)
   32.49 +
   32.50 +/*struct net_page_info {
   32.51 +        struct list_head list;
   32.52 +        unsigned long   virt_addr;
   32.53 +        unsigned long   ppte;
   32.54 +};
   32.55 +
   32.56 +extern char *net_page_chunk;
   32.57 +extern struct net_page_info *net_page_table;
   32.58 +extern struct list_head net_page_list;
   32.59 +extern spinlock_t net_page_list_lock;
   32.60 +extern unsigned int net_pages;
   32.61 +*/
   32.62 +/* End zero copy additions */
   32.63 +
   32.64 +#define HAVE_ALLOC_SKB		/* For the drivers to know */
   32.65 +#define HAVE_ALIGNABLE_SKB	/* Ditto 8)		   */
   32.66 +#define SLAB_SKB 		/* Slabified skbuffs 	   */
   32.67 +
   32.68 +#define CHECKSUM_NONE 0
   32.69 +#define CHECKSUM_HW 1
   32.70 +#define CHECKSUM_UNNECESSARY 2
   32.71 +
   32.72 +#define SKB_DATA_ALIGN(X)	(((X) + (SMP_CACHE_BYTES-1)) & ~(SMP_CACHE_BYTES-1))
   32.73 +#define SKB_MAX_ORDER(X,ORDER)	(((PAGE_SIZE<<(ORDER)) - (X) - sizeof(struct skb_shared_info))&~(SMP_CACHE_BYTES-1))
   32.74 +#define SKB_MAX_HEAD(X)		(SKB_MAX_ORDER((X),0))
   32.75 +#define SKB_MAX_ALLOC		(SKB_MAX_ORDER(0,2))
   32.76 +
   32.77 +/* A. Checksumming of received packets by device.
   32.78 + *
   32.79 + *	NONE: device failed to checksum this packet.
   32.80 + *		skb->csum is undefined.
   32.81 + *
   32.82 + *	UNNECESSARY: device parsed packet and wouldbe verified checksum.
   32.83 + *		skb->csum is undefined.
   32.84 + *	      It is bad option, but, unfortunately, many of vendors do this.
   32.85 + *	      Apparently with secret goal to sell you new device, when you
   32.86 + *	      will add new protocol to your host. F.e. IPv6. 8)
   32.87 + *
   32.88 + *	HW: the most generic way. Device supplied checksum of _all_
   32.89 + *	    the packet as seen by netif_rx in skb->csum.
   32.90 + *	    NOTE: Even if device supports only some protocols, but
   32.91 + *	    is able to produce some skb->csum, it MUST use HW,
   32.92 + *	    not UNNECESSARY.
   32.93 + *
   32.94 + * B. Checksumming on output.
   32.95 + *
   32.96 + *	NONE: skb is checksummed by protocol or csum is not required.
   32.97 + *
   32.98 + *	HW: device is required to csum packet as seen by hard_start_xmit
   32.99 + *	from skb->h.raw to the end and to record the checksum
  32.100 + *	at skb->h.raw+skb->csum.
  32.101 + *
  32.102 + *	Device must show its capabilities in dev->features, set
  32.103 + *	at device setup time.
  32.104 + *	NETIF_F_HW_CSUM	- it is clever device, it is able to checksum
  32.105 + *			  everything.
  32.106 + *	NETIF_F_NO_CSUM - loopback or reliable single hop media.
  32.107 + *	NETIF_F_IP_CSUM - device is dumb. It is able to csum only
  32.108 + *			  TCP/UDP over IPv4. Sigh. Vendors like this
  32.109 + *			  way by an unknown reason. Though, see comment above
  32.110 + *			  about CHECKSUM_UNNECESSARY. 8)
  32.111 + *
  32.112 + *	Any questions? No questions, good. 		--ANK
  32.113 + */
  32.114 +
  32.115 +#ifdef __i386__
  32.116 +#define NET_CALLER(arg) (*(((void**)&arg)-1))
  32.117 +#else
  32.118 +#define NET_CALLER(arg) __builtin_return_address(0)
  32.119 +#endif
  32.120 +
  32.121 +#ifdef CONFIG_NETFILTER
  32.122 +struct nf_conntrack {
  32.123 +	atomic_t use;
  32.124 +	void (*destroy)(struct nf_conntrack *);
  32.125 +};
  32.126 +
  32.127 +struct nf_ct_info {
  32.128 +	struct nf_conntrack *master;
  32.129 +};
  32.130 +#endif
  32.131 +
  32.132 +struct sk_buff_head {
  32.133 +	/* These two members must be first. */
  32.134 +	struct sk_buff	* next;
  32.135 +	struct sk_buff	* prev;
  32.136 +
  32.137 +	__u32		qlen;
  32.138 +	spinlock_t	lock;
  32.139 +};
  32.140 +
  32.141 +struct sk_buff;
  32.142 +
  32.143 +#define MAX_SKB_FRAGS 6
  32.144 +
  32.145 +typedef struct skb_frag_struct skb_frag_t;
  32.146 +
  32.147 +struct skb_frag_struct
  32.148 +{
  32.149 +	struct page *page;
  32.150 +	__u16 page_offset;
  32.151 +	__u16 size;
  32.152 +};
  32.153 +
  32.154 +/* This data is invariant across clones and lives at
  32.155 + * the end of the header data, ie. at skb->end.
  32.156 + */
  32.157 +struct skb_shared_info {
  32.158 +	atomic_t	dataref;
  32.159 +	unsigned int	nr_frags;
  32.160 +	struct sk_buff	*frag_list;
  32.161 +	skb_frag_t	frags[MAX_SKB_FRAGS];
  32.162 +};
  32.163 +
  32.164 +struct sk_buff {
  32.165 +	/* These two members must be first. */
  32.166 +	struct sk_buff	* next;			/* Next buffer in list 				*/
  32.167 +	struct sk_buff	* prev;			/* Previous buffer in list 			*/
  32.168 +
  32.169 +	struct sk_buff_head * list;		/* List we are on				*/
  32.170 +	struct sock	*sk;			/* Socket we are owned by 			*/
  32.171 +	struct timeval	stamp;			/* Time we arrived				*/
  32.172 +	struct net_device	*dev;		/* Device we arrived on/are leaving by		*/
  32.173 +
  32.174 +	/* Transport layer header */
  32.175 +	union
  32.176 +	{
  32.177 +		struct tcphdr	*th;
  32.178 +		struct udphdr	*uh;
  32.179 +		struct icmphdr	*icmph;
  32.180 +		struct igmphdr	*igmph;
  32.181 +		struct iphdr	*ipiph;
  32.182 +		struct spxhdr	*spxh;
  32.183 +		unsigned char	*raw;
  32.184 +	} h;
  32.185 +
  32.186 +	/* Network layer header */
  32.187 +	union
  32.188 +	{
  32.189 +		struct iphdr	*iph;
  32.190 +		struct ipv6hdr	*ipv6h;
  32.191 +		struct arphdr	*arph;
  32.192 +		struct ipxhdr	*ipxh;
  32.193 +		unsigned char	*raw;
  32.194 +	} nh;
  32.195 +  
  32.196 +	/* Link layer header */
  32.197 +	union 
  32.198 +	{	
  32.199 +	  	struct ethhdr	*ethernet;
  32.200 +	  	unsigned char 	*raw;
  32.201 +	} mac;
  32.202 +
  32.203 +	struct  dst_entry *dst;
  32.204 +
  32.205 +	/* 
  32.206 +	 * This is the control buffer. It is free to use for every
  32.207 +	 * layer. Please put your private variables there. If you
  32.208 +	 * want to keep them across layers you have to do a skb_clone()
  32.209 +	 * first. This is owned by whoever has the skb queued ATM.
  32.210 +	 */ 
  32.211 +	char		cb[48];	 
  32.212 +
  32.213 +	unsigned int 	len;			/* Length of actual data			*/
  32.214 + 	unsigned int 	data_len;
  32.215 +	unsigned int	csum;			/* Checksum 					*/
  32.216 +	unsigned char 	__unused,		/* Dead field, may be reused			*/
  32.217 +			cloned, 		/* head may be cloned (check refcnt to be sure). */
  32.218 +  			pkt_type,		/* Packet class					*/
  32.219 +  			ip_summed;		/* Driver fed us an IP checksum			*/
  32.220 +	__u32		priority;		/* Packet queueing priority			*/
  32.221 +	atomic_t	users;			/* User count - see datagram.c,tcp.c 		*/
  32.222 +	unsigned short	protocol;		/* Packet protocol from driver. 		*/
  32.223 +	unsigned short	security;		/* Security level of packet			*/
  32.224 +	unsigned int	truesize;		/* Buffer size 					*/
  32.225 +
  32.226 +	unsigned char	*head;			/* Head of buffer 				*/
  32.227 +	unsigned char	*data;			/* Data head pointer				*/
  32.228 +	unsigned char	*tail;			/* Tail pointer					*/
  32.229 +	unsigned char 	*end;			/* End pointer					*/
  32.230 +
  32.231 +	void 		(*destructor)(struct sk_buff *);	/* Destruct function		*/
  32.232 +#ifdef CONFIG_NETFILTER
  32.233 +	/* Can be used for communication between hooks. */
  32.234 +        unsigned long	nfmark;
  32.235 +	/* Cache info */
  32.236 +	__u32		nfcache;
  32.237 +	/* Associated connection, if any */
  32.238 +	struct nf_ct_info *nfct;
  32.239 +#ifdef CONFIG_NETFILTER_DEBUG
  32.240 +        unsigned int nf_debug;
  32.241 +#endif
  32.242 +#endif /*CONFIG_NETFILTER*/
  32.243 +
  32.244 +#if defined(CONFIG_HIPPI)
  32.245 +	union{
  32.246 +		__u32	ifield;
  32.247 +	} private;
  32.248 +#endif
  32.249 +
  32.250 +#ifdef CONFIG_NET_SCHED
  32.251 +       __u32           tc_index;                /* traffic control index */
  32.252 +#endif
  32.253 +       unsigned int     skb_type;                /* for zero copy handling.                      */
  32.254 +       struct net_page_info *net_page;
  32.255 +};
  32.256 +
  32.257 +#define SK_WMEM_MAX	65535
  32.258 +#define SK_RMEM_MAX	65535
  32.259 +
  32.260 +#ifdef __KERNEL__
  32.261 +/*
  32.262 + *	Handling routines are only of interest to the kernel
  32.263 + */
  32.264 +#include <linux/slab.h>
  32.265 +
  32.266 +#include <asm/system.h>
  32.267 +
  32.268 +extern void			__kfree_skb(struct sk_buff *skb);
  32.269 +extern struct sk_buff *		alloc_skb(unsigned int size, int priority);
  32.270 +extern struct sk_buff *         alloc_zc_skb(unsigned int size, int priority);
  32.271 +extern void			kfree_skbmem(struct sk_buff *skb);
  32.272 +extern struct sk_buff *		skb_clone(struct sk_buff *skb, int priority);
  32.273 +extern struct sk_buff *		skb_copy(const struct sk_buff *skb, int priority);
  32.274 +extern struct sk_buff *		pskb_copy(struct sk_buff *skb, int gfp_mask);
  32.275 +extern int			pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask);
  32.276 +extern struct sk_buff *		skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom);
  32.277 +extern struct sk_buff *		skb_copy_expand(const struct sk_buff *skb, 
  32.278 +						int newheadroom,
  32.279 +						int newtailroom,
  32.280 +						int priority);
  32.281 +#define dev_kfree_skb(a)	kfree_skb(a)
  32.282 +extern void	skb_over_panic(struct sk_buff *skb, int len, void *here);
  32.283 +extern void	skb_under_panic(struct sk_buff *skb, int len, void *here);
  32.284 +
  32.285 +/* Internal */
  32.286 +#define skb_shinfo(SKB)		((struct skb_shared_info *)((SKB)->end))
  32.287 +
  32.288 +/**
  32.289 + *	skb_queue_empty - check if a queue is empty
  32.290 + *	@list: queue head
  32.291 + *
  32.292 + *	Returns true if the queue is empty, false otherwise.
  32.293 + */
  32.294 + 
  32.295 +static inline int skb_queue_empty(struct sk_buff_head *list)
  32.296 +{
  32.297 +	return (list->next == (struct sk_buff *) list);
  32.298 +}
  32.299 +
  32.300 +/**
  32.301 + *	skb_get - reference buffer
  32.302 + *	@skb: buffer to reference
  32.303 + *
  32.304 + *	Makes another reference to a socket buffer and returns a pointer
  32.305 + *	to the buffer.
  32.306 + */
  32.307 + 
  32.308 +static inline struct sk_buff *skb_get(struct sk_buff *skb)
  32.309 +{
  32.310 +	atomic_inc(&skb->users);
  32.311 +	return skb;
  32.312 +}
  32.313 +
  32.314 +/*
  32.315 + * If users==1, we are the only owner and are can avoid redundant
  32.316 + * atomic change.
  32.317 + */
  32.318 + 
  32.319 +/**
  32.320 + *	kfree_skb - free an sk_buff
  32.321 + *	@skb: buffer to free
  32.322 + *
  32.323 + *	Drop a reference to the buffer and free it if the usage count has
  32.324 + *	hit zero.
  32.325 + */
  32.326 + 
  32.327 +static inline void kfree_skb(struct sk_buff *skb)
  32.328 +{
  32.329 +	if (atomic_read(&skb->users) == 1 || atomic_dec_and_test(&skb->users))
  32.330 +		__kfree_skb(skb);
  32.331 +}
  32.332 +
  32.333 +/* Use this if you didn't touch the skb state [for fast switching] */
  32.334 +static inline void kfree_skb_fast(struct sk_buff *skb)
  32.335 +{
  32.336 +	if (atomic_read(&skb->users) == 1 || atomic_dec_and_test(&skb->users))
  32.337 +		kfree_skbmem(skb);	
  32.338 +}
  32.339 +
  32.340 +/**
  32.341 + *	skb_cloned - is the buffer a clone
  32.342 + *	@skb: buffer to check
  32.343 + *
  32.344 + *	Returns true if the buffer was generated with skb_clone() and is
  32.345 + *	one of multiple shared copies of the buffer. Cloned buffers are
  32.346 + *	shared data so must not be written to under normal circumstances.
  32.347 + */
  32.348 +
  32.349 +static inline int skb_cloned(struct sk_buff *skb)
  32.350 +{
  32.351 +	return skb->cloned && atomic_read(&skb_shinfo(skb)->dataref) != 1;
  32.352 +}
  32.353 +
  32.354 +/**
  32.355 + *	skb_shared - is the buffer shared
  32.356 + *	@skb: buffer to check
  32.357 + *
  32.358 + *	Returns true if more than one person has a reference to this
  32.359 + *	buffer.
  32.360 + */
  32.361 + 
  32.362 +static inline int skb_shared(struct sk_buff *skb)
  32.363 +{
  32.364 +	return (atomic_read(&skb->users) != 1);
  32.365 +}
  32.366 +
  32.367 +/** 
  32.368 + *	skb_share_check - check if buffer is shared and if so clone it
  32.369 + *	@skb: buffer to check
  32.370 + *	@pri: priority for memory allocation
  32.371 + *	
  32.372 + *	If the buffer is shared the buffer is cloned and the old copy
  32.373 + *	drops a reference. A new clone with a single reference is returned.
  32.374 + *	If the buffer is not shared the original buffer is returned. When
  32.375 + *	being called from interrupt status or with spinlocks held pri must
  32.376 + *	be GFP_ATOMIC.
  32.377 + *
  32.378 + *	NULL is returned on a memory allocation failure.
  32.379 + */
  32.380 + 
  32.381 +static inline struct sk_buff *skb_share_check(struct sk_buff *skb, int pri)
  32.382 +{
  32.383 +	if (skb_shared(skb)) {
  32.384 +		struct sk_buff *nskb;
  32.385 +		nskb = skb_clone(skb, pri);
  32.386 +		kfree_skb(skb);
  32.387 +		return nskb;
  32.388 +	}
  32.389 +	return skb;
  32.390 +}
  32.391 +
  32.392 +
  32.393 +/*
  32.394 + *	Copy shared buffers into a new sk_buff. We effectively do COW on
  32.395 + *	packets to handle cases where we have a local reader and forward
  32.396 + *	and a couple of other messy ones. The normal one is tcpdumping
  32.397 + *	a packet thats being forwarded.
  32.398 + */
  32.399 + 
  32.400 +/**
  32.401 + *	skb_unshare - make a copy of a shared buffer
  32.402 + *	@skb: buffer to check
  32.403 + *	@pri: priority for memory allocation
  32.404 + *
  32.405 + *	If the socket buffer is a clone then this function creates a new
  32.406 + *	copy of the data, drops a reference count on the old copy and returns
  32.407 + *	the new copy with the reference count at 1. If the buffer is not a clone
  32.408 + *	the original buffer is returned. When called with a spinlock held or
  32.409 + *	from interrupt state @pri must be %GFP_ATOMIC
  32.410 + *
  32.411 + *	%NULL is returned on a memory allocation failure.
  32.412 + */
  32.413 + 
  32.414 +static inline struct sk_buff *skb_unshare(struct sk_buff *skb, int pri)
  32.415 +{
  32.416 +	struct sk_buff *nskb;
  32.417 +	if(!skb_cloned(skb))
  32.418 +		return skb;
  32.419 +	nskb=skb_copy(skb, pri);
  32.420 +	kfree_skb(skb);		/* Free our shared copy */
  32.421 +	return nskb;
  32.422 +}
  32.423 +
  32.424 +/**
  32.425 + *	skb_peek
  32.426 + *	@list_: list to peek at
  32.427 + *
  32.428 + *	Peek an &sk_buff. Unlike most other operations you _MUST_
  32.429 + *	be careful with this one. A peek leaves the buffer on the
  32.430 + *	list and someone else may run off with it. You must hold
  32.431 + *	the appropriate locks or have a private queue to do this.
  32.432 + *
  32.433 + *	Returns %NULL for an empty list or a pointer to the head element.
  32.434 + *	The reference count is not incremented and the reference is therefore
  32.435 + *	volatile. Use with caution.
  32.436 + */
  32.437 + 
  32.438 +static inline struct sk_buff *skb_peek(struct sk_buff_head *list_)
  32.439 +{
  32.440 +	struct sk_buff *list = ((struct sk_buff *)list_)->next;
  32.441 +	if (list == (struct sk_buff *)list_)
  32.442 +		list = NULL;
  32.443 +	return list;
  32.444 +}
  32.445 +
  32.446 +/**
  32.447 + *	skb_peek_tail
  32.448 + *	@list_: list to peek at
  32.449 + *
  32.450 + *	Peek an &sk_buff. Unlike most other operations you _MUST_
  32.451 + *	be careful with this one. A peek leaves the buffer on the
  32.452 + *	list and someone else may run off with it. You must hold
  32.453 + *	the appropriate locks or have a private queue to do this.
  32.454 + *
  32.455 + *	Returns %NULL for an empty list or a pointer to the tail element.
  32.456 + *	The reference count is not incremented and the reference is therefore
  32.457 + *	volatile. Use with caution.
  32.458 + */
  32.459 +
  32.460 +static inline struct sk_buff *skb_peek_tail(struct sk_buff_head *list_)
  32.461 +{
  32.462 +	struct sk_buff *list = ((struct sk_buff *)list_)->prev;
  32.463 +	if (list == (struct sk_buff *)list_)
  32.464 +		list = NULL;
  32.465 +	return list;
  32.466 +}
  32.467 +
  32.468 +/**
  32.469 + *	skb_queue_len	- get queue length
  32.470 + *	@list_: list to measure
  32.471 + *
  32.472 + *	Return the length of an &sk_buff queue. 
  32.473 + */
  32.474 + 
  32.475 +static inline __u32 skb_queue_len(struct sk_buff_head *list_)
  32.476 +{
  32.477 +	return(list_->qlen);
  32.478 +}
  32.479 +
  32.480 +static inline void skb_queue_head_init(struct sk_buff_head *list)
  32.481 +{
  32.482 +	spin_lock_init(&list->lock);
  32.483 +	list->prev = (struct sk_buff *)list;
  32.484 +	list->next = (struct sk_buff *)list;
  32.485 +	list->qlen = 0;
  32.486 +}
  32.487 +
  32.488 +/*
  32.489 + *	Insert an sk_buff at the start of a list.
  32.490 + *
  32.491 + *	The "__skb_xxxx()" functions are the non-atomic ones that
  32.492 + *	can only be called with interrupts disabled.
  32.493 + */
  32.494 +
  32.495 +/**
  32.496 + *	__skb_queue_head - queue a buffer at the list head
  32.497 + *	@list: list to use
  32.498 + *	@newsk: buffer to queue
  32.499 + *
  32.500 + *	Queue a buffer at the start of a list. This function takes no locks
  32.501 + *	and you must therefore hold required locks before calling it.
  32.502 + *
  32.503 + *	A buffer cannot be placed on two lists at the same time.
  32.504 + */	
  32.505 + 
  32.506 +static inline void __skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk)
  32.507 +{
  32.508 +	struct sk_buff *prev, *next;
  32.509 +
  32.510 +	newsk->list = list;
  32.511 +	list->qlen++;
  32.512 +	prev = (struct sk_buff *)list;
  32.513 +	next = prev->next;
  32.514 +	newsk->next = next;
  32.515 +	newsk->prev = prev;
  32.516 +	next->prev = newsk;
  32.517 +	prev->next = newsk;
  32.518 +}
  32.519 +
  32.520 +
  32.521 +/**
  32.522 + *	skb_queue_head - queue a buffer at the list head
  32.523 + *	@list: list to use
  32.524 + *	@newsk: buffer to queue
  32.525 + *
  32.526 + *	Queue a buffer at the start of the list. This function takes the
  32.527 + *	list lock and can be used safely with other locking &sk_buff functions
  32.528 + *	safely.
  32.529 + *
  32.530 + *	A buffer cannot be placed on two lists at the same time.
  32.531 + */	
  32.532 +
  32.533 +static inline void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk)
  32.534 +{
  32.535 +	unsigned long flags;
  32.536 +
  32.537 +	spin_lock_irqsave(&list->lock, flags);
  32.538 +	__skb_queue_head(list, newsk);
  32.539 +	spin_unlock_irqrestore(&list->lock, flags);
  32.540 +}
  32.541 +
  32.542 +/**
  32.543 + *	__skb_queue_tail - queue a buffer at the list tail
  32.544 + *	@list: list to use
  32.545 + *	@newsk: buffer to queue
  32.546 + *
  32.547 + *	Queue a buffer at the end of a list. This function takes no locks
  32.548 + *	and you must therefore hold required locks before calling it.
  32.549 + *
  32.550 + *	A buffer cannot be placed on two lists at the same time.
  32.551 + */	
  32.552 + 
  32.553 +
  32.554 +static inline void __skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk)
  32.555 +{
  32.556 +	struct sk_buff *prev, *next;
  32.557 +
  32.558 +	newsk->list = list;
  32.559 +	list->qlen++;
  32.560 +	next = (struct sk_buff *)list;
  32.561 +	prev = next->prev;
  32.562 +	newsk->next = next;
  32.563 +	newsk->prev = prev;
  32.564 +	next->prev = newsk;
  32.565 +	prev->next = newsk;
  32.566 +}
  32.567 +
  32.568 +/**
  32.569 + *	skb_queue_tail - queue a buffer at the list tail
  32.570 + *	@list: list to use
  32.571 + *	@newsk: buffer to queue
  32.572 + *
  32.573 + *	Queue a buffer at the tail of the list. This function takes the
  32.574 + *	list lock and can be used safely with other locking &sk_buff functions
  32.575 + *	safely.
  32.576 + *
  32.577 + *	A buffer cannot be placed on two lists at the same time.
  32.578 + */	
  32.579 +
  32.580 +static inline void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk)
  32.581 +{
  32.582 +	unsigned long flags;
  32.583 +
  32.584 +	spin_lock_irqsave(&list->lock, flags);
  32.585 +	__skb_queue_tail(list, newsk);
  32.586 +	spin_unlock_irqrestore(&list->lock, flags);
  32.587 +}
  32.588 +
  32.589 +/**
  32.590 + *	__skb_dequeue - remove from the head of the queue
  32.591 + *	@list: list to dequeue from
  32.592 + *
  32.593 + *	Remove the head of the list. This function does not take any locks
  32.594 + *	so must be used with appropriate locks held only. The head item is
  32.595 + *	returned or %NULL if the list is empty.
  32.596 + */
  32.597 +
  32.598 +static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list)
  32.599 +{
  32.600 +	struct sk_buff *next, *prev, *result;
  32.601 +
  32.602 +	prev = (struct sk_buff *) list;
  32.603 +	next = prev->next;
  32.604 +	result = NULL;
  32.605 +	if (next != prev) {
  32.606 +		result = next;
  32.607 +		next = next->next;
  32.608 +		list->qlen--;
  32.609 +		next->prev = prev;
  32.610 +		prev->next = next;
  32.611 +		result->next = NULL;
  32.612 +		result->prev = NULL;
  32.613 +		result->list = NULL;
  32.614 +	}
  32.615 +	return result;
  32.616 +}
  32.617 +
  32.618 +/**
  32.619 + *	skb_dequeue - remove from the head of the queue
  32.620 + *	@list: list to dequeue from
  32.621 + *
  32.622 + *	Remove the head of the list. The list lock is taken so the function
  32.623 + *	may be used safely with other locking list functions. The head item is
  32.624 + *	returned or %NULL if the list is empty.
  32.625 + */
  32.626 +
  32.627 +static inline struct sk_buff *skb_dequeue(struct sk_buff_head *list)
  32.628 +{
  32.629 +	long flags;
  32.630 +	struct sk_buff *result;
  32.631 +
  32.632 +	spin_lock_irqsave(&list->lock, flags);
  32.633 +	result = __skb_dequeue(list);
  32.634 +	spin_unlock_irqrestore(&list->lock, flags);
  32.635 +	return result;
  32.636 +}
  32.637 +
  32.638 +/*
  32.639 + *	Insert a packet on a list.
  32.640 + */
  32.641 +
  32.642 +static inline void __skb_insert(struct sk_buff *newsk,
  32.643 +	struct sk_buff * prev, struct sk_buff *next,
  32.644 +	struct sk_buff_head * list)
  32.645 +{
  32.646 +	newsk->next = next;
  32.647 +	newsk->prev = prev;
  32.648 +	next->prev = newsk;
  32.649 +	prev->next = newsk;
  32.650 +	newsk->list = list;
  32.651 +	list->qlen++;
  32.652 +}
  32.653 +
  32.654 +/**
  32.655 + *	skb_insert	-	insert a buffer
  32.656 + *	@old: buffer to insert before
  32.657 + *	@newsk: buffer to insert
  32.658 + *
  32.659 + *	Place a packet before a given packet in a list. The list locks are taken
  32.660 + *	and this function is atomic with respect to other list locked calls
  32.661 + *	A buffer cannot be placed on two lists at the same time.
  32.662 + */
  32.663 +
  32.664 +static inline void skb_insert(struct sk_buff *old, struct sk_buff *newsk)
  32.665 +{
  32.666 +	unsigned long flags;
  32.667 +
  32.668 +	spin_lock_irqsave(&old->list->lock, flags);
  32.669 +	__skb_insert(newsk, old->prev, old, old->list);
  32.670 +	spin_unlock_irqrestore(&old->list->lock, flags);
  32.671 +}
  32.672 +
  32.673 +/*
  32.674 + *	Place a packet after a given packet in a list.
  32.675 + */
  32.676 +
  32.677 +static inline void __skb_append(struct sk_buff *old, struct sk_buff *newsk)
  32.678 +{
  32.679 +	__skb_insert(newsk, old, old->next, old->list);
  32.680 +}
  32.681 +
  32.682 +/**
  32.683 + *	skb_append	-	append a buffer
  32.684 + *	@old: buffer to insert after
  32.685 + *	@newsk: buffer to insert
  32.686 + *
  32.687 + *	Place a packet after a given packet in a list. The list locks are taken
  32.688 + *	and this function is atomic with respect to other list locked calls.
  32.689 + *	A buffer cannot be placed on two lists at the same time.
  32.690 + */
  32.691 +
  32.692 +
  32.693 +static inline void skb_append(struct sk_buff *old, struct sk_buff *newsk)
  32.694 +{
  32.695 +	unsigned long flags;
  32.696 +
  32.697 +	spin_lock_irqsave(&old->list->lock, flags);
  32.698 +	__skb_append(old, newsk);
  32.699 +	spin_unlock_irqrestore(&old->list->lock, flags);
  32.700 +}
  32.701 +
  32.702 +/*
  32.703 + * remove sk_buff from list. _Must_ be called atomically, and with
  32.704 + * the list known..
  32.705 + */
  32.706 + 
  32.707 +static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list)
  32.708 +{
  32.709 +	struct sk_buff * next, * prev;
  32.710 +
  32.711 +	list->qlen--;
  32.712 +	next = skb->next;
  32.713 +	prev = skb->prev;
  32.714 +	skb->next = NULL;
  32.715 +	skb->prev = NULL;
  32.716 +	skb->list = NULL;
  32.717 +	next->prev = prev;
  32.718 +	prev->next = next;
  32.719 +}
  32.720 +
  32.721 +/**
  32.722 + *	skb_unlink	-	remove a buffer from a list
  32.723 + *	@skb: buffer to remove
  32.724 + *
  32.725 + *	Place a packet after a given packet in a list. The list locks are taken
  32.726 + *	and this function is atomic with respect to other list locked calls
  32.727 + *	
  32.728 + *	Works even without knowing the list it is sitting on, which can be 
  32.729 + *	handy at times. It also means that THE LIST MUST EXIST when you 
  32.730 + *	unlink. Thus a list must have its contents unlinked before it is
  32.731 + *	destroyed.
  32.732 + */
  32.733 +
  32.734 +static inline void skb_unlink(struct sk_buff *skb)
  32.735 +{
  32.736 +	struct sk_buff_head *list = skb->list;
  32.737 +
  32.738 +	if(list) {
  32.739 +		unsigned long flags;
  32.740 +
  32.741 +		spin_lock_irqsave(&list->lock, flags);
  32.742 +		if(skb->list == list)
  32.743 +			__skb_unlink(skb, skb->list);
  32.744 +		spin_unlock_irqrestore(&list->lock, flags);
  32.745 +	}
  32.746 +}
  32.747 +
  32.748 +/* XXX: more streamlined implementation */
  32.749 +
  32.750 +/**
  32.751 + *	__skb_dequeue_tail - remove from the tail of the queue
  32.752 + *	@list: list to dequeue from
  32.753 + *
  32.754 + *	Remove the tail of the list. This function does not take any locks
  32.755 + *	so must be used with appropriate locks held only. The tail item is
  32.756 + *	returned or %NULL if the list is empty.
  32.757 + */
  32.758 +
  32.759 +static inline struct sk_buff *__skb_dequeue_tail(struct sk_buff_head *list)
  32.760 +{
  32.761 +	struct sk_buff *skb = skb_peek_tail(list); 
  32.762 +	if (skb)
  32.763 +		__skb_unlink(skb, list);
  32.764 +	return skb;
  32.765 +}
  32.766 +
  32.767 +/**
  32.768 + *	skb_dequeue - remove from the head of the queue
  32.769 + *	@list: list to dequeue from
  32.770 + *
  32.771 + *	Remove the head of the list. The list lock is taken so the function
  32.772 + *	may be used safely with other locking list functions. The tail item is
  32.773 + *	returned or %NULL if the list is empty.
  32.774 + */
  32.775 +
  32.776 +static inline struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list)
  32.777 +{
  32.778 +	long flags;
  32.779 +	struct sk_buff *result;
  32.780 +
  32.781 +	spin_lock_irqsave(&list->lock, flags);
  32.782 +	result = __skb_dequeue_tail(list);
  32.783 +	spin_unlock_irqrestore(&list->lock, flags);
  32.784 +	return result;
  32.785 +}
  32.786 +
  32.787 +static inline int skb_is_nonlinear(const struct sk_buff *skb)
  32.788 +{
  32.789 +	return skb->data_len;
  32.790 +}
  32.791 +
  32.792 +static inline int skb_headlen(const struct sk_buff *skb)
  32.793 +{
  32.794 +	return skb->len - skb->data_len;
  32.795 +}
  32.796 +
  32.797 +#define SKB_PAGE_ASSERT(skb) do { if (skb_shinfo(skb)->nr_frags) BUG(); } while (0)
  32.798 +#define SKB_FRAG_ASSERT(skb) do { if (skb_shinfo(skb)->frag_list) BUG(); } while (0)
  32.799 +#define SKB_LINEAR_ASSERT(skb) do { if (skb_is_nonlinear(skb)) BUG(); } while (0)
  32.800 +
  32.801 +/*
  32.802 + *	Add data to an sk_buff
  32.803 + */
  32.804 + 
  32.805 +static inline unsigned char *__skb_put(struct sk_buff *skb, unsigned int len)
  32.806 +{
  32.807 +	unsigned char *tmp=skb->tail;
  32.808 +	SKB_LINEAR_ASSERT(skb);
  32.809 +	skb->tail+=len;
  32.810 +	skb->len+=len;
  32.811 +	return tmp;
  32.812 +}
  32.813 +
  32.814 +/**
  32.815 + *	skb_put - add data to a buffer
  32.816 + *	@skb: buffer to use 
  32.817 + *	@len: amount of data to add
  32.818 + *
  32.819 + *	This function extends the used data area of the buffer. If this would
  32.820 + *	exceed the total buffer size the kernel will panic. A pointer to the
  32.821 + *	first byte of the extra data is returned.
  32.822 + */
  32.823 + 
  32.824 +static inline unsigned char *skb_put(struct sk_buff *skb, unsigned int len)
  32.825 +{
  32.826 +	unsigned char *tmp=skb->tail;
  32.827 +	SKB_LINEAR_ASSERT(skb);
  32.828 +	skb->tail+=len;
  32.829 +	skb->len+=len;
  32.830 +	if(skb->tail>skb->end) {
  32.831 +		skb_over_panic(skb, len, current_text_addr());
  32.832 +	}
  32.833 +	return tmp;
  32.834 +}
  32.835 +
  32.836 +static inline unsigned char *__skb_push(struct sk_buff *skb, unsigned int len)
  32.837 +{
  32.838 +	skb->data-=len;
  32.839 +	skb->len+=len;
  32.840 +	return skb->data;
  32.841 +}
  32.842 +
  32.843 +/**
  32.844 + *	skb_push - add data to the start of a buffer
  32.845 + *	@skb: buffer to use 
  32.846 + *	@len: amount of data to add
  32.847 + *
  32.848 + *	This function extends the used data area of the buffer at the buffer
  32.849 + *	start. If this would exceed the total buffer headroom the kernel will
  32.850 + *	panic. A pointer to the first byte of the extra data is returned.
  32.851 + */
  32.852 +
  32.853 +static inline unsigned char *skb_push(struct sk_buff *skb, unsigned int len)
  32.854 +{
  32.855 +	skb->data-=len;
  32.856 +	skb->len+=len;
  32.857 +	if(skb->data<skb->head) {
  32.858 +		skb_under_panic(skb, len, current_text_addr());
  32.859 +	}
  32.860 +	return skb->data;
  32.861 +}
  32.862 +
  32.863 +static inline char *__skb_pull(struct sk_buff *skb, unsigned int len)
  32.864 +{
  32.865 +	skb->len-=len;
  32.866 +	if (skb->len < skb->data_len)
  32.867 +		BUG();
  32.868 +	return 	skb->data+=len;
  32.869 +}
  32.870 +
  32.871 +/**
  32.872 + *	skb_pull - remove data from the start of a buffer
  32.873 + *	@skb: buffer to use 
  32.874 + *	@len: amount of data to remove
  32.875 + *
  32.876 + *	This function removes data from the start of a buffer, returning
  32.877 + *	the memory to the headroom. A pointer to the next data in the buffer
  32.878 + *	is returned. Once the data has been pulled future pushes will overwrite
  32.879 + *	the old data.
  32.880 + */
  32.881 +
  32.882 +static inline unsigned char * skb_pull(struct sk_buff *skb, unsigned int len)
  32.883 +{	
  32.884 +	if (len > skb->len)
  32.885 +		return NULL;
  32.886 +	return __skb_pull(skb,len);
  32.887 +}
  32.888 +
  32.889 +extern unsigned char * __pskb_pull_tail(struct sk_buff *skb, int delta);
  32.890 +
  32.891 +static inline char *__pskb_pull(struct sk_buff *skb, unsigned int len)
  32.892 +{
  32.893 +	if (len > skb_headlen(skb) &&
  32.894 +	    __pskb_pull_tail(skb, len-skb_headlen(skb)) == NULL)
  32.895 +		return NULL;
  32.896 +	skb->len -= len;
  32.897 +	return 	skb->data += len;
  32.898 +}
  32.899 +
  32.900 +static inline unsigned char * pskb_pull(struct sk_buff *skb, unsigned int len)
  32.901 +{	
  32.902 +	if (len > skb->len)
  32.903 +		return NULL;
  32.904 +	return __pskb_pull(skb,len);
  32.905 +}
  32.906 +
  32.907 +static inline int pskb_may_pull(struct sk_buff *skb, unsigned int len)
  32.908 +{
  32.909 +	if (len <= skb_headlen(skb))
  32.910 +		return 1;
  32.911 +	if (len > skb->len)
  32.912 +		return 0;
  32.913 +	return (__pskb_pull_tail(skb, len-skb_headlen(skb)) != NULL);
  32.914 +}
  32.915 +
  32.916 +/**
  32.917 + *	skb_headroom - bytes at buffer head
  32.918 + *	@skb: buffer to check
  32.919 + *
  32.920 + *	Return the number of bytes of free space at the head of an &sk_buff.
  32.921 + */
  32.922 + 
  32.923 +static inline int skb_headroom(const struct sk_buff *skb)
  32.924 +{
  32.925 +	return skb->data-skb->head;
  32.926 +}
  32.927 +
  32.928 +/**
  32.929 + *	skb_tailroom - bytes at buffer end
  32.930 + *	@skb: buffer to check
  32.931 + *
  32.932 + *	Return the number of bytes of free space at the tail of an sk_buff
  32.933 + */
  32.934 +
  32.935 +static inline int skb_tailroom(const struct sk_buff *skb)
  32.936 +{
  32.937 +	return skb_is_nonlinear(skb) ? 0 : skb->end-skb->tail;
  32.938 +}
  32.939 +
  32.940 +/**
  32.941 + *	skb_reserve - adjust headroom
  32.942 + *	@skb: buffer to alter
  32.943 + *	@len: bytes to move
  32.944 + *
  32.945 + *	Increase the headroom of an empty &sk_buff by reducing the tail
  32.946 + *	room. This is only allowed for an empty buffer.
  32.947 + */
  32.948 +
  32.949 +static inline void skb_reserve(struct sk_buff *skb, unsigned int len)
  32.950 +{
  32.951 +	skb->data+=len;
  32.952 +	skb->tail+=len;
  32.953 +}
  32.954 +
  32.955 +extern int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc);
  32.956 +
  32.957 +static inline void __skb_trim(struct sk_buff *skb, unsigned int len)
  32.958 +{
  32.959 +	if (!skb->data_len) {
  32.960 +		skb->len = len;
  32.961 +		skb->tail = skb->data+len;
  32.962 +	} else {
  32.963 +		___pskb_trim(skb, len, 0);
  32.964 +	}
  32.965 +}
  32.966 +
  32.967 +/**
  32.968 + *	skb_trim - remove end from a buffer
  32.969 + *	@skb: buffer to alter
  32.970 + *	@len: new length
  32.971 + *
  32.972 + *	Cut the length of a buffer down by removing data from the tail. If
  32.973 + *	the buffer is already under the length specified it is not modified.
  32.974 + */
  32.975 +
  32.976 +static inline void skb_trim(struct sk_buff *skb, unsigned int len)
  32.977 +{
  32.978 +	if (skb->len > len) {
  32.979 +		__skb_trim(skb, len);
  32.980 +	}
  32.981 +}
  32.982 +
  32.983 +
  32.984 +static inline int __pskb_trim(struct sk_buff *skb, unsigned int len)
  32.985 +{
  32.986 +	if (!skb->data_len) {
  32.987 +		skb->len = len;
  32.988 +		skb->tail = skb->data+len;
  32.989 +		return 0;
  32.990 +	} else {
  32.991 +		return ___pskb_trim(skb, len, 1);
  32.992 +	}
  32.993 +}
  32.994 +
  32.995 +static inline int pskb_trim(struct sk_buff *skb, unsigned int len)
  32.996 +{
  32.997 +	if (len < skb->len)
  32.998 +		return __pskb_trim(skb, len);
  32.999 +	return 0;
 32.1000 +}
 32.1001 +
 32.1002 +/**
 32.1003 + *	skb_orphan - orphan a buffer
 32.1004 + *	@skb: buffer to orphan
 32.1005 + *
 32.1006 + *	If a buffer currently has an owner then we call the owner's
 32.1007 + *	destructor function and make the @skb unowned. The buffer continues
 32.1008 + *	to exist but is no longer charged to its former owner.
 32.1009 + */
 32.1010 +
 32.1011 +
 32.1012 +static inline void skb_orphan(struct sk_buff *skb)
 32.1013 +{
 32.1014 +	if (skb->destructor)
 32.1015 +		skb->destructor(skb);
 32.1016 +	skb->destructor = NULL;
 32.1017 +	skb->sk = NULL;
 32.1018 +}
 32.1019 +
 32.1020 +/**
 32.1021 + *	skb_purge - empty a list
 32.1022 + *	@list: list to empty
 32.1023 + *
 32.1024 + *	Delete all buffers on an &sk_buff list. Each buffer is removed from
 32.1025 + *	the list and one reference dropped. This function takes the list
 32.1026 + *	lock and is atomic with respect to other list locking functions.
 32.1027 + */
 32.1028 +
 32.1029 +
 32.1030 +static inline void skb_queue_purge(struct sk_buff_head *list)
 32.1031 +{
 32.1032 +	struct sk_buff *skb;
 32.1033 +	while ((skb=skb_dequeue(list))!=NULL)
 32.1034 +		kfree_skb(skb);
 32.1035 +}
 32.1036 +
 32.1037 +/**
 32.1038 + *	__skb_purge - empty a list
 32.1039 + *	@list: list to empty
 32.1040 + *
 32.1041 + *	Delete all buffers on an &sk_buff list. Each buffer is removed from
 32.1042 + *	the list and one reference dropped. This function does not take the
 32.1043 + *	list lock and the caller must hold the relevant locks to use it.
 32.1044 + */
 32.1045 +
 32.1046 +
 32.1047 +static inline void __skb_queue_purge(struct sk_buff_head *list)
 32.1048 +{
 32.1049 +	struct sk_buff *skb;
 32.1050 +	while ((skb=__skb_dequeue(list))!=NULL)
 32.1051 +		kfree_skb(skb);
 32.1052 +}
 32.1053 +
 32.1054 +/**
 32.1055 + *	__dev_alloc_skb - allocate an skbuff for sending
 32.1056 + *	@length: length to allocate
 32.1057 + *	@gfp_mask: get_free_pages mask, passed to alloc_skb
 32.1058 + *
 32.1059 + *	Allocate a new &sk_buff and assign it a usage count of one. The
 32.1060 + *	buffer has unspecified headroom built in. Users should allocate
 32.1061 + *	the headroom they think they need without accounting for the
 32.1062 + *	built in space. The built in space is used for optimisations.
 32.1063 + *
 32.1064 + *	%NULL is returned in there is no free memory.
 32.1065 + */
 32.1066 + 
 32.1067 +static inline struct sk_buff *__dev_alloc_skb(unsigned int length,
 32.1068 +					      int gfp_mask)
 32.1069 +{
 32.1070 +	struct sk_buff *skb;
 32.1071 +
 32.1072 +	skb = alloc_skb(length+16, gfp_mask);
 32.1073 +        //skb = alloc_zc_skb(length+16, gfp_mask);
 32.1074 +	if (skb)
 32.1075 +		skb_reserve(skb,16);
 32.1076 +	return skb;
 32.1077 +}
 32.1078 +
 32.1079 +/**
 32.1080 + *	dev_alloc_skb - allocate an skbuff for sending
 32.1081 + *	@length: length to allocate
 32.1082 + *
 32.1083 + *	Allocate a new &sk_buff and assign it a usage count of one. The
 32.1084 + *	buffer has unspecified headroom built in. Users should allocate
 32.1085 + *	the headroom they think they need without accounting for the
 32.1086 + *	built in space. The built in space is used for optimisations.
 32.1087 + *
 32.1088 + *	%NULL is returned in there is no free memory. Although this function
 32.1089 + *	allocates memory it can be called from an interrupt.
 32.1090 + */
 32.1091 + 
 32.1092 +static inline struct sk_buff *dev_alloc_skb(unsigned int length)
 32.1093 +{
 32.1094 +	return __dev_alloc_skb(length, GFP_ATOMIC);
 32.1095 +}
 32.1096 +
 32.1097 +/**
 32.1098 + *	skb_cow - copy header of skb when it is required
 32.1099 + *	@skb: buffer to cow
 32.1100 + *	@headroom: needed headroom
 32.1101 + *
 32.1102 + *	If the skb passed lacks sufficient headroom or its data part
 32.1103 + *	is shared, data is reallocated. If reallocation fails, an error
 32.1104 + *	is returned and original skb is not changed.
 32.1105 + *
 32.1106 + *	The result is skb with writable area skb->head...skb->tail
 32.1107 + *	and at least @headroom of space at head.
 32.1108 + */
 32.1109 +
 32.1110 +static inline int
 32.1111 +skb_cow(struct sk_buff *skb, unsigned int headroom)
 32.1112 +{
 32.1113 +	int delta = (headroom > 16 ? headroom : 16) - skb_headroom(skb);
 32.1114 +
 32.1115 +	if (delta < 0)
 32.1116 +		delta = 0;
 32.1117 +
 32.1118 +	if (delta || skb_cloned(skb))
 32.1119 +		return pskb_expand_head(skb, (delta+15)&~15, 0, GFP_ATOMIC);
 32.1120 +	return 0;
 32.1121 +}
 32.1122 +
 32.1123 +/**
 32.1124 + *	skb_linearize - convert paged skb to linear one
 32.1125 + *	@skb: buffer to linarize
 32.1126 + *	@gfp: allocation mode
 32.1127 + *
 32.1128 + *	If there is no free memory -ENOMEM is returned, otherwise zero
 32.1129 + *	is returned and the old skb data released.  */
 32.1130 +int skb_linearize(struct sk_buff *skb, int gfp);
 32.1131 +
 32.1132 +static inline void *kmap_skb_frag(const skb_frag_t *frag)
 32.1133 +{
 32.1134 +#ifdef CONFIG_HIGHMEM
 32.1135 +	if (in_irq())
 32.1136 +		BUG();
 32.1137 +
 32.1138 +	local_bh_disable();
 32.1139 +#endif
 32.1140 +	return kmap_atomic(frag->page, KM_SKB_DATA_SOFTIRQ);
 32.1141 +}
 32.1142 +
 32.1143 +static inline void kunmap_skb_frag(void *vaddr)
 32.1144 +{
 32.1145 +	kunmap_atomic(vaddr, KM_SKB_DATA_SOFTIRQ);
 32.1146 +#ifdef CONFIG_HIGHMEM
 32.1147 +	local_bh_enable();
 32.1148 +#endif
 32.1149 +}
 32.1150 +
 32.1151 +#define skb_queue_walk(queue, skb) \
 32.1152 +		for (skb = (queue)->next;			\
 32.1153 +		     (skb != (struct sk_buff *)(queue));	\
 32.1154 +		     skb=skb->next)
 32.1155 +
 32.1156 +
 32.1157 +extern struct sk_buff *		skb_recv_datagram(struct sock *sk,unsigned flags,int noblock, int *err);
 32.1158 +extern unsigned int		datagram_poll(struct file *file, struct socket *sock, struct poll_table_struct *wait);
 32.1159 +extern int			skb_copy_datagram(const struct sk_buff *from, int offset, char *to,int size);
 32.1160 +extern int			skb_copy_datagram_iovec(const struct sk_buff *from, int offset, struct iovec *to,int size);
 32.1161 +extern int			skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int *csump);
 32.1162 +extern int			skb_copy_and_csum_datagram_iovec(const struct sk_buff *skb, int hlen, struct iovec *iov);
 32.1163 +extern void			skb_free_datagram(struct sock * sk, struct sk_buff *skb);
 32.1164 +
 32.1165 +extern unsigned int		skb_checksum(const struct sk_buff *skb, int offset, int len, unsigned int csum);
 32.1166 +extern int			skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len);
 32.1167 +extern unsigned int		skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int csum);
 32.1168 +extern void			skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to);
 32.1169 +
 32.1170 +extern void skb_init(void);
 32.1171 +extern void skb_add_mtu(int mtu);
 32.1172 +
 32.1173 +#ifdef CONFIG_NETFILTER
 32.1174 +static inline void
 32.1175 +nf_conntrack_put(struct nf_ct_info *nfct)
 32.1176 +{
 32.1177 +	if (nfct && atomic_dec_and_test(&nfct->master->use))
 32.1178 +		nfct->master->destroy(nfct->master);
 32.1179 +}
 32.1180 +static inline void
 32.1181 +nf_conntrack_get(struct nf_ct_info *nfct)
 32.1182 +{
 32.1183 +	if (nfct)
 32.1184 +		atomic_inc(&nfct->master->use);
 32.1185 +}
 32.1186 +#endif
 32.1187 +
 32.1188 +#endif	/* __KERNEL__ */
 32.1189 +#endif	/* _LINUX_SKBUFF_H */
    33.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    33.2 +++ b/xenolinux-2.4.16-sparse/net/core/skbuff.c	Fri Feb 14 14:27:45 2003 +0000
    33.3 @@ -0,0 +1,1373 @@
    33.4 +/*
    33.5 + *	Routines having to do with the 'struct sk_buff' memory handlers.
    33.6 + *
    33.7 + *	Authors:	Alan Cox <iiitac@pyr.swan.ac.uk>
    33.8 + *			Florian La Roche <rzsfl@rz.uni-sb.de>
    33.9 + *
   33.10 + *	Version:	$Id: skbuff.c,v 1.89 2001/08/06 13:25:02 davem Exp $
   33.11 + *
   33.12 + *	Fixes:	
   33.13 + *		Alan Cox	:	Fixed the worst of the load balancer bugs.
   33.14 + *		Dave Platt	:	Interrupt stacking fix.
   33.15 + *	Richard Kooijman	:	Timestamp fixes.
   33.16 + *		Alan Cox	:	Changed buffer format.
   33.17 + *		Alan Cox	:	destructor hook for AF_UNIX etc.
   33.18 + *		Linus Torvalds	:	Better skb_clone.
   33.19 + *		Alan Cox	:	Added skb_copy.
   33.20 + *		Alan Cox	:	Added all the changed routines Linus
   33.21 + *					only put in the headers
   33.22 + *		Ray VanTassle	:	Fixed --skb->lock in free
   33.23 + *		Alan Cox	:	skb_copy copy arp field
   33.24 + *		Andi Kleen	:	slabified it.
   33.25 + *
   33.26 + *	NOTE:
   33.27 + *		The __skb_ routines should be called with interrupts 
   33.28 + *	disabled, or you better be *real* sure that the operation is atomic 
   33.29 + *	with respect to whatever list is being frobbed (e.g. via lock_sock()
   33.30 + *	or via disabling bottom half handlers, etc).
   33.31 + *
   33.32 + *	This program is free software; you can redistribute it and/or
   33.33 + *	modify it under the terms of the GNU General Public License
   33.34 + *	as published by the Free Software Foundation; either version
   33.35 + *	2 of the License, or (at your option) any later version.
   33.36 + */
   33.37 +
   33.38 +/*
   33.39 + *	The functions in this file will not compile correctly with gcc 2.4.x
   33.40 + */
   33.41 +
   33.42 +#include <linux/config.h>
   33.43 +#include <linux/types.h>
   33.44 +#include <linux/kernel.h>
   33.45 +#include <linux/sched.h>
   33.46 +#include <linux/mm.h>
   33.47 +#include <linux/interrupt.h>
   33.48 +#include <linux/in.h>
   33.49 +#include <linux/inet.h>
   33.50 +#include <linux/slab.h>
   33.51 +#include <linux/netdevice.h>
   33.52 +#include <linux/string.h>
   33.53 +#include <linux/skbuff.h>
   33.54 +#include <linux/cache.h>
   33.55 +#include <linux/init.h>
   33.56 +#include <linux/highmem.h>
   33.57 +#include <linux/spinlock.h>
   33.58 +
   33.59 +#include <net/ip.h>
   33.60 +#include <net/protocol.h>
   33.61 +#include <net/dst.h>
   33.62 +#include <net/tcp.h>
   33.63 +#include <net/udp.h>
   33.64 +#include <net/sock.h>
   33.65 +#include <asm/io.h>
   33.66 +#include <asm/uaccess.h>
   33.67 +#include <asm/system.h>
   33.68 +
   33.69 +/* zc globals: */
   33.70 +/*
   33.71 +char *net_page_chunk;
   33.72 +struct net_page_info *net_page_table;
   33.73 +struct list_head net_page_list;
   33.74 +spinlock_t net_page_list_lock = SPIN_LOCK_UNLOCKED;
   33.75 +unsigned int net_pages;
   33.76 +*/
   33.77 +
   33.78 +
   33.79 +int sysctl_hot_list_len = 128;
   33.80 +
   33.81 +static kmem_cache_t *skbuff_head_cache;
   33.82 +
   33.83 +static union {
   33.84 +	struct sk_buff_head	list;
   33.85 +	char			pad[SMP_CACHE_BYTES];
   33.86 +} skb_head_pool[NR_CPUS];
   33.87 +
   33.88 +/*
   33.89 + *	Keep out-of-line to prevent kernel bloat.
   33.90 + *	__builtin_return_address is not used because it is not always
   33.91 + *	reliable. 
   33.92 + */
   33.93 +
   33.94 +/**
   33.95 + *	skb_over_panic	- 	private function
   33.96 + *	@skb: buffer
   33.97 + *	@sz: size
   33.98 + *	@here: address
   33.99 + *
  33.100 + *	Out of line support code for skb_put(). Not user callable.
  33.101 + */
  33.102 + 
  33.103 +void skb_over_panic(struct sk_buff *skb, int sz, void *here)
  33.104 +{
  33.105 +	printk("skput:over: %p:%d put:%d dev:%s", 
  33.106 +		here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>");
  33.107 +	BUG();
  33.108 +}
  33.109 +
  33.110 +/**
  33.111 + *	skb_under_panic	- 	private function
  33.112 + *	@skb: buffer
  33.113 + *	@sz: size
  33.114 + *	@here: address
  33.115 + *
  33.116 + *	Out of line support code for skb_push(). Not user callable.
  33.117 + */
  33.118 + 
  33.119 +
  33.120 +void skb_under_panic(struct sk_buff *skb, int sz, void *here)
  33.121 +{
  33.122 +        printk("skput:under: %p:%d put:%d dev:%s",
  33.123 +                here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>");
  33.124 +	BUG();
  33.125 +}
  33.126 +
  33.127 +static __inline__ struct sk_buff *skb_head_from_pool(void)
  33.128 +{
  33.129 +	struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list;
  33.130 +
  33.131 +	if (skb_queue_len(list)) {
  33.132 +		struct sk_buff *skb;
  33.133 +		unsigned long flags;
  33.134 +
  33.135 +		local_irq_save(flags);
  33.136 +		skb = __skb_dequeue(list);
  33.137 +		local_irq_restore(flags);
  33.138 +		return skb;
  33.139 +	}
  33.140 +	return NULL;
  33.141 +}
  33.142 +
  33.143 +static __inline__ void skb_head_to_pool(struct sk_buff *skb)
  33.144 +{
  33.145 +	struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list;
  33.146 +
  33.147 +	if (skb_queue_len(list) < sysctl_hot_list_len) {
  33.148 +		unsigned long flags;
  33.149 +
  33.150 +		local_irq_save(flags);
  33.151 +		__skb_queue_head(list, skb);
  33.152 +		local_irq_restore(flags);
  33.153 +
  33.154 +		return;
  33.155 +	}
  33.156 +	kmem_cache_free(skbuff_head_cache, skb);
  33.157 +}
  33.158 +
  33.159 +
  33.160 +/* 	Allocate a new skbuff. We do this ourselves so we can fill in a few
  33.161 + *	'private' fields and also do memory statistics to find all the
  33.162 + *	[BEEP] leaks.
  33.163 + * 
  33.164 + */
  33.165 +
  33.166 +/**
  33.167 + *	alloc_skb	-	allocate a network buffer
  33.168 + *	@size: size to allocate
  33.169 + *	@gfp_mask: allocation mask
  33.170 + *
  33.171 + *	Allocate a new &sk_buff. The returned buffer has no headroom and a
  33.172 + *	tail room of size bytes. The object has a reference count of one.
  33.173 + *	The return is the buffer. On a failure the return is %NULL.
  33.174 + *
  33.175 + *	Buffers may only be allocated from interrupts using a @gfp_mask of
  33.176 + *	%GFP_ATOMIC.
  33.177 + */
  33.178 + 
  33.179 +struct sk_buff *alloc_skb(unsigned int size,int gfp_mask)
  33.180 +{
  33.181 +	struct sk_buff *skb;
  33.182 +	u8 *data;
  33.183 +
  33.184 +	if (in_interrupt() && (gfp_mask & __GFP_WAIT)) {
  33.185 +		static int count = 0;
  33.186 +		if (++count < 5) {
  33.187 +			printk(KERN_ERR "alloc_skb called nonatomically "
  33.188 +			       "from interrupt %p\n", NET_CALLER(size));
  33.189 + 			BUG();
  33.190 +		}
  33.191 +		gfp_mask &= ~__GFP_WAIT;
  33.192 +	}
  33.193 +
  33.194 +	/* Get the HEAD */
  33.195 +	skb = skb_head_from_pool();
  33.196 +	if (skb == NULL) {
  33.197 +		skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask & ~__GFP_DMA);
  33.198 +		if (skb == NULL)
  33.199 +			goto nohead;
  33.200 +	}
  33.201 +
  33.202 +	/* Get the DATA. Size must match skb_add_mtu(). */
  33.203 +	size = SKB_DATA_ALIGN(size);
  33.204 +	data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
  33.205 +	if (data == NULL)
  33.206 +		goto nodata;
  33.207 +
  33.208 +	/* XXX: does not include slab overhead */ 
  33.209 +	skb->truesize = size + sizeof(struct sk_buff);
  33.210 +
  33.211 +	/* Load the data pointers. */
  33.212 +	skb->head = data;
  33.213 +	skb->data = data;
  33.214 +	skb->tail = data;
  33.215 +	skb->end = data + size;
  33.216 +
  33.217 +	/* Set up other state */
  33.218 +	skb->len = 0;
  33.219 +	skb->cloned = 0;
  33.220 +	skb->data_len = 0;
  33.221 +        skb->skb_type = SKB_NORMAL;
  33.222 +
  33.223 +	atomic_set(&skb->users, 1); 
  33.224 +	atomic_set(&(skb_shinfo(skb)->dataref), 1);
  33.225 +	skb_shinfo(skb)->nr_frags = 0;
  33.226 +	skb_shinfo(skb)->frag_list = NULL;
  33.227 +	return skb;
  33.228 +
  33.229 +nodata:
  33.230 +	skb_head_to_pool(skb);
  33.231 +nohead:
  33.232 +	return NULL;
  33.233 +}
  33.234 +
  33.235 +/* begin zc code additions: */
  33.236 +/*
  33.237 +void init_net_pages(unsigned long order_pages)
  33.238 +{
  33.239 +        int i;
  33.240 +        struct net_page_info *np;
  33.241 +        pgd_t *pgd; pmd_t *pmd; pte_t *ptep;
  33.242 +        unsigned long nr_pages = 1 << order_pages;
  33.243 +        
  33.244 +        net_page_chunk = (char *)__get_free_pages(GFP_KERNEL, order_pages);
  33.245 +        net_page_table = kmalloc(nr_pages * sizeof(struct net_page_info), GFP_KERNEL);
  33.246 +
  33.247 +        INIT_LIST_HEAD(&net_page_list);
  33.248 +
  33.249 +        for (i = 0; i < nr_pages; i++) 
  33.250 +        {
  33.251 +                np = net_page_table + i;
  33.252 +                np->virt_addr = (unsigned long)net_page_chunk + (i * PAGE_SIZE);
  33.253 +
  33.254 +                // now fill the pte pointer:
  33.255 +                //np->ppte = 0xdeadbeef;
  33.256 +                //pgd = pgd_offset_k(np->virt_addr);
  33.257 +                //if (pgd_none(*pgd) || pgd_bad(*pgd)) BUG();
  33.258 +
  33.259 +                //if (pmd_none(*pmd)) BUG(); 
  33.260 +                //if (pmd_bad(*pmd)) BUG();
  33.261 +
  33.262 +                //ptep = pte_offset(pmd, np->virt_addr);
  33.263 +                //np->ppte = phys_to_machine(virt_to_phys(ptep));
  33.264 +                
  33.265 +                list_add_tail(&np->list, &net_page_list);
  33.266 +        }
  33.267 +        net_pages = nr_pages;
  33.268 +        
  33.269 +
  33.270 +}
  33.271 +
  33.272 +struct net_page_info *get_net_page(void)
  33.273 +{
  33.274 +
  33.275 +    struct list_head *list_ptr;
  33.276 +    struct net_page_info *np;
  33.277 +    unsigned long flags;
  33.278 +
  33.279 +    if (!net_pages) 
  33.280 +    {
  33.281 +            return NULL;
  33.282 +    }
  33.283 +    spin_lock_irqsave(&net_page_list_lock, flags);
  33.284 +    
  33.285 +    list_ptr = net_page_list.next;
  33.286 +    np = list_entry(list_ptr, struct net_page_info, list);
  33.287 +    list_del(&np->list);
  33.288 +    net_pages--;
  33.289 +    
  33.290 +    spin_unlock_irqrestore(&net_page_list_lock, flags);
  33.291 +    
  33.292 +    return np;
  33.293 +}
  33.294 +
  33.295 +void free_net_page(struct net_page_info *np)
  33.296 +{
  33.297 +    unsigned long flags;
  33.298 +  
  33.299 +    if (np == NULL) return;
  33.300 +    
  33.301 +    spin_lock_irqsave(&net_page_list_lock, flags);
  33.302 +    
  33.303 +    list_add(&np->list, &net_page_list);
  33.304 +    net_pages++;
  33.305 +
  33.306 +    spin_unlock_irqrestore(&net_page_list_lock, flags);
  33.307 +
  33.308 +}
  33.309 +*/
  33.310 +struct sk_buff *alloc_zc_skb(unsigned int size,int gfp_mask)
  33.311 +{
  33.312 +	struct sk_buff *skb;
  33.313 +	u8 *data;
  33.314 +
  33.315 +	if (in_interrupt() && (gfp_mask & __GFP_WAIT)) {
  33.316 +		static int count = 0;
  33.317 +		if (++count < 5) {
  33.318 +			printk(KERN_ERR "alloc_skb called nonatomically "
  33.319 +			       "from interrupt %p\n", NET_CALLER(size));
  33.320 + 			BUG();
  33.321 +		}
  33.322 +		gfp_mask &= ~__GFP_WAIT;
  33.323 +	}
  33.324 +
  33.325 +	/* Get the HEAD */
  33.326 +	skb = skb_head_from_pool();
  33.327 +	if (skb == NULL) {
  33.328 +		skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask & ~__GFP_DMA);
  33.329 +		if (skb == NULL)
  33.330 +			goto nohead;
  33.331 +	}
  33.332 +
  33.333 +	/* Get the DATA. Size must match skb_add_mtu(). */
  33.334 +	size = SKB_DATA_ALIGN(size);
  33.335 +        if (size > PAGE_SIZE)
  33.336 +        {
  33.337 +                printk("alloc_zc_skb called with unruly size.\n");
  33.338 +                size = PAGE_SIZE;
  33.339 +        }
  33.340 +	/*skb->net_page = get_net_page();
  33.341 +        if (skb->net_page == NULL)
  33.342 +        {
  33.343 +                goto nodata;
  33.344 +        }
  33.345 +        data = (u8 *)skb->net_page->virt_addr;*/
  33.346 +        data = (char *)__get_free_page(gfp_mask);
  33.347 +	if (data == NULL)
  33.348 +		goto nodata;
  33.349 +	/* XXX: does not include slab overhead */ 
  33.350 +	skb->truesize = size + sizeof(struct sk_buff);
  33.351 +
  33.352 +	/* Load the data pointers. */
  33.353 +	skb->head = data;
  33.354 +	skb->data = data;
  33.355 +	skb->tail = data;
  33.356 +	skb->end = data + size;
  33.357 +
  33.358 +	/* Set up other state */
  33.359 +	skb->len = 0;
  33.360 +	skb->cloned = 0;
  33.361 +	skb->data_len = 0;
  33.362 +        skb->skb_type = SKB_ZERO_COPY;
  33.363 +
  33.364 +	atomic_set(&skb->users, 1); 
  33.365 +	atomic_set(&(skb_shinfo(skb)->dataref), 1);
  33.366 +	skb_shinfo(skb)->nr_frags = 0;
  33.367 +	skb_shinfo(skb)->frag_list = NULL;
  33.368 +	return skb;
  33.369 +
  33.370 +nodata:
  33.371 +	skb_head_to_pool(skb);
  33.372 +nohead:
  33.373 +	return NULL;
  33.374 +}
  33.375 +
  33.376 +/* end zc code additions: */
  33.377 +
  33.378 +/*
  33.379 + *	Slab constructor for a skb head. 
  33.380 + */ 
  33.381 +static inline void skb_headerinit(void *p, kmem_cache_t *cache, 
  33.382 +				  unsigned long flags)
  33.383 +{
  33.384 +	struct sk_buff *skb = p;
  33.385 +
  33.386 +	skb->next = NULL;
  33.387 +	skb->prev = NULL;
  33.388 +	skb->list = NULL;
  33.389 +	skb->sk = NULL;
  33.390 +	skb->stamp.tv_sec=0;	/* No idea about time */
  33.391 +	skb->dev = NULL;
  33.392 +	skb->dst = NULL;
  33.393 +	memset(skb->cb, 0, sizeof(skb->cb));
  33.394 +	skb->pkt_type = PACKET_HOST;	/* Default type */
  33.395 +	skb->ip_summed = 0;
  33.396 +	skb->priority = 0;
  33.397 +	skb->security = 0;	/* By default packets are insecure */
  33.398 +	skb->destructor = NULL;
  33.399 +
  33.400 +#ifdef CONFIG_NETFILTER
  33.401 +	skb->nfmark = skb->nfcache = 0;
  33.402 +	skb->nfct = NULL;
  33.403 +#ifdef CONFIG_NETFILTER_DEBUG
  33.404 +	skb->nf_debug = 0;
  33.405 +#endif
  33.406 +#endif
  33.407 +#ifdef CONFIG_NET_SCHED
  33.408 +	skb->tc_index = 0;
  33.409 +#endif
  33.410 +}
  33.411 +
  33.412 +static void skb_drop_fraglist(struct sk_buff *skb)
  33.413 +{
  33.414 +	struct sk_buff *list = skb_shinfo(skb)->frag_list;
  33.415 +
  33.416 +	skb_shinfo(skb)->frag_list = NULL;
  33.417 +
  33.418 +	do {
  33.419 +		struct sk_buff *this = list;
  33.420 +		list = list->next;
  33.421 +		kfree_skb(this);
  33.422 +	} while (list);
  33.423 +}
  33.424 +
  33.425 +static void skb_clone_fraglist(struct sk_buff *skb)
  33.426 +{
  33.427 +	struct sk_buff *list;
  33.428 +
  33.429 +	for (list = skb_shinfo(skb)->frag_list; list; list=list->next)
  33.430 +		skb_get(list);
  33.431 +}
  33.432 +
  33.433 +static void skb_release_data(struct sk_buff *skb)
  33.434 +{
  33.435 +        if (!skb->cloned ||
  33.436 +	    atomic_dec_and_test(&(skb_shinfo(skb)->dataref))) {
  33.437 +		if (skb_shinfo(skb)->nr_frags) {
  33.438 +			int i;
  33.439 +			for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) 
  33.440 +{
  33.441 +				put_page(skb_shinfo(skb)->frags[i].page);
  33.442 +}
  33.443 +		}
  33.444 +
  33.445 +		if (skb_shinfo(skb)->frag_list)
  33.446 +			skb_drop_fraglist(skb);
  33.447 +
  33.448 +                if (skb->skb_type == SKB_NORMAL)
  33.449 +                {
  33.450 +		    kfree(skb->head);
  33.451 +                } else {// SKB_ZERO_COPY
  33.452 +                    //free_net_page(skb->net_page);
  33.453 +//printk(KERN_ALERT "<%p>\n", phys_to_machine(virt_to_phys(skb->head)));
  33.454 +                    free_page((void *)skb->head);
  33.455 +                }
  33.456 +	}
  33.457 +
  33.458 +}
  33.459 +
  33.460 +/*
  33.461 + *	Free an skbuff by memory without cleaning the state. 
  33.462 + */
  33.463 +void kfree_skbmem(struct sk_buff *skb)
  33.464 +{
  33.465 +	skb_release_data(skb);
  33.466 +	skb_head_to_pool(skb);
  33.467 +}
  33.468 +
  33.469 +/**
  33.470 + *	__kfree_skb - private function 
  33.471 + *	@skb: buffer
  33.472 + *
  33.473 + *	Free an sk_buff. Release anything attached to the buffer. 
  33.474 + *	Clean the state. This is an internal helper function. Users should
  33.475 + *	always call kfree_skb
  33.476 + */
  33.477 +
  33.478 +void __kfree_skb(struct sk_buff *skb)
  33.479 +{
  33.480 +	if (skb->list) {
  33.481 +	 	printk(KERN_WARNING "Warning: kfree_skb passed an skb still "
  33.482 +		       "on a list (from %p).\n", NET_CALLER(skb));
  33.483 +		BUG();
  33.484 +	}
  33.485 +
  33.486 +	dst_release(skb->dst);
  33.487 +	if(skb->destructor) {
  33.488 +		if (in_irq()) {
  33.489 +			printk(KERN_WARNING "Warning: kfree_skb on hard IRQ %p\n",
  33.490 +				NET_CALLER(skb));
  33.491 +		}
  33.492 +		skb->destructor(skb);
  33.493 +	}
  33.494 +#ifdef CONFIG_NETFILTER
  33.495 +	nf_conntrack_put(skb->nfct);
  33.496 +#endif
  33.497 +	skb_headerinit(skb, NULL, 0);  /* clean state */
  33.498 +	kfree_skbmem(skb);
  33.499 +}
  33.500 +
  33.501 +/**
  33.502 + *	skb_clone	-	duplicate an sk_buff
  33.503 + *	@skb: buffer to clone
  33.504 + *	@gfp_mask: allocation priority
  33.505 + *
  33.506 + *	Duplicate an &sk_buff. The new one is not owned by a socket. Both
  33.507 + *	copies share the same packet data but not structure. The new
  33.508 + *	buffer has a reference count of 1. If the allocation fails the 
  33.509 + *	function returns %NULL otherwise the new buffer is returned.
  33.510 + *	
  33.511 + *	If this function is called from an interrupt gfp_mask() must be
  33.512 + *	%GFP_ATOMIC.
  33.513 + */
  33.514 +
  33.515 +struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask)
  33.516 +{
  33.517 +	struct sk_buff *n;
  33.518 +
  33.519 +	n = skb_head_from_pool();
  33.520 +	if (!n) {
  33.521 +		n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
  33.522 +		if (!n)
  33.523 +			return NULL;
  33.524 +	}
  33.525 +
  33.526 +#define C(x) n->x = skb->x
  33.527 +
  33.528 +	n->next = n->prev = NULL;
  33.529 +	n->list = NULL;
  33.530 +	n->sk = NULL;
  33.531 +	C(stamp);
  33.532 +	C(dev);
  33.533 +	C(h);
  33.534 +	C(nh);
  33.535 +	C(mac);
  33.536 +	C(dst);
  33.537 +	dst_clone(n->dst);
  33.538 +	memcpy(n->cb, skb->cb, sizeof(skb->cb));
  33.539 +	C(len);
  33.540 +	C(data_len);
  33.541 +	C(csum);
  33.542 +	n->cloned = 1;
  33.543 +	C(pkt_type);
  33.544 +	C(ip_summed);
  33.545 +	C(priority);
  33.546 +	atomic_set(&n->users, 1);
  33.547 +	C(protocol);
  33.548 +	C(security);
  33.549 +	C(truesize);
  33.550 +	C(head);
  33.551 +	C(data);
  33.552 +	C(tail);
  33.553 +	C(end);
  33.554 +	n->destructor = NULL;
  33.555 +#ifdef CONFIG_NETFILTER
  33.556 +	C(nfmark);
  33.557 +	C(nfcache);
  33.558 +	C(nfct);
  33.559 +#ifdef CONFIG_NETFILTER_DEBUG
  33.560 +	C(nf_debug);
  33.561 +#endif
  33.562 +#endif /*CONFIG_NETFILTER*/
  33.563 +#if defined(CONFIG_HIPPI)
  33.564 +	C(private);
  33.565 +#endif
  33.566 +#ifdef CONFIG_NET_SCHED
  33.567 +	C(tc_index);
  33.568 +#endif
  33.569 +        C(skb_type);
  33.570 +        //C(net_page);
  33.571 +	atomic_inc(&(skb_shinfo(skb)->dataref));
  33.572 +	skb->cloned = 1;
  33.573 +#ifdef CONFIG_NETFILTER
  33.574 +	nf_conntrack_get(skb->nfct);
  33.575 +#endif
  33.576 +	return n;
  33.577 +}
  33.578 +
  33.579 +static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
  33.580 +{
  33.581 +	/*
  33.582 +	 *	Shift between the two data areas in bytes
  33.583 +	 */
  33.584 +	unsigned long offset = new->data - old->data;
  33.585 +
  33.586 +	new->list=NULL;
  33.587 +	new->sk=NULL;
  33.588 +	new->dev=old->dev;
  33.589 +	new->priority=old->priority;
  33.590 +	new->protocol=old->protocol;
  33.591 +	new->dst=dst_clone(old->dst);
  33.592 +	new->h.raw=old->h.raw+offset;
  33.593 +	new->nh.raw=old->nh.raw+offset;
  33.594 +	new->mac.raw=old->mac.raw+offset;
  33.595 +	memcpy(new->cb, old->cb, sizeof(old->cb));
  33.596 +	atomic_set(&new->users, 1);
  33.597 +	new->pkt_type=old->pkt_type;
  33.598 +	new->stamp=old->stamp;
  33.599 +	new->destructor = NULL;
  33.600 +	new->security=old->security;
  33.601 +#ifdef CONFIG_NETFILTER
  33.602 +	new->nfmark=old->nfmark;
  33.603 +	new->nfcache=old->nfcache;
  33.604 +	new->nfct=old->nfct;
  33.605 +	nf_conntrack_get(new->nfct);
  33.606 +#ifdef CONFIG_NETFILTER_DEBUG
  33.607 +	new->nf_debug=old->nf_debug;
  33.608 +#endif
  33.609 +#endif
  33.610 +#ifdef CONFIG_NET_SCHED
  33.611 +	new->tc_index = old->tc_index;
  33.612 +#endif
  33.613 +}
  33.614 +
  33.615 +/**
  33.616 + *	skb_copy	-	create private copy of an sk_buff
  33.617 + *	@skb: buffer to copy
  33.618 + *	@gfp_mask: allocation priority
  33.619 + *
  33.620 + *	Make a copy of both an &sk_buff and its data. This is used when the
  33.621 + *	caller wishes to modify the data and needs a private copy of the 
  33.622 + *	data to alter. Returns %NULL on failure or the pointer to the buffer
  33.623 + *	on success. The returned buffer has a reference count of 1.
  33.624 + *
  33.625 + *	As by-product this function converts non-linear &sk_buff to linear
  33.626 + *	one, so that &sk_buff becomes completely private and caller is allowed
  33.627 + *	to modify all the data of returned buffer. This means that this
  33.628 + *	function is not recommended for use in circumstances when only
  33.629 + *	header is going to be modified. Use pskb_copy() instead.
  33.630 + */
  33.631 + 
  33.632 +struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask)
  33.633 +{
  33.634 +	struct sk_buff *n;
  33.635 +	int headerlen = skb->data-skb->head;
  33.636 +
  33.637 +	/*
  33.638 +	 *	Allocate the copy buffer
  33.639 +	 */
  33.640 +	n=alloc_skb(skb->end - skb->head + skb->data_len, gfp_mask);
  33.641 +	if(n==NULL)
  33.642 +		return NULL;
  33.643 +
  33.644 +	/* Set the data pointer */
  33.645 +	skb_reserve(n,headerlen);
  33.646 +	/* Set the tail pointer and length */
  33.647 +	skb_put(n,skb->len);
  33.648 +	n->csum = skb->csum;
  33.649 +	n->ip_summed = skb->ip_summed;
  33.650 +
  33.651 +	if (skb_copy_bits(skb, -headerlen, n->head, headerlen+skb->len))
  33.652 +		BUG();
  33.653 +
  33.654 +	copy_skb_header(n, skb);
  33.655 +
  33.656 +	return n;
  33.657 +}
  33.658 +
  33.659 +/* Keep head the same: replace data */
  33.660 +int skb_linearize(struct sk_buff *skb, int gfp_mask)
  33.661 +{
  33.662 +	unsigned int size;
  33.663 +	u8 *data;
  33.664 +	long offset;
  33.665 +	int headerlen = skb->data - skb->head;
  33.666 +	int expand = (skb->tail+skb->data_len) - skb->end;
  33.667 +
  33.668 +	if (skb_shared(skb))
  33.669 +		BUG();
  33.670 +
  33.671 +	if (expand <= 0)
  33.672 +		expand = 0;
  33.673 +
  33.674 +	size = (skb->end - skb->head + expand);
  33.675 +	size = SKB_DATA_ALIGN(size);
  33.676 +	data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
  33.677 +	if (data == NULL)
  33.678 +		return -ENOMEM;
  33.679 +
  33.680 +	/* Copy entire thing */
  33.681 +	if (skb_copy_bits(skb, -headerlen, data, headerlen+skb->len))
  33.682 +		BUG();
  33.683 +
  33.684 +	/* Offset between the two in bytes */
  33.685 +	offset = data - skb->head;
  33.686 +
  33.687 +	/* Free old data. */
  33.688 +	skb_release_data(skb);
  33.689 +
  33.690 +	skb->head = data;
  33.691 +	skb->end  = data + size;
  33.692 +
  33.693 +	/* Set up new pointers */
  33.694 +	skb->h.raw += offset;
  33.695 +	skb->nh.raw += offset;
  33.696 +	skb->mac.raw += offset;
  33.697 +	skb->tail += offset;
  33.698 +	skb->data += offset;
  33.699 +
  33.700 +	/* Set up shinfo */
  33.701 +	atomic_set(&(skb_shinfo(skb)->dataref), 1);
  33.702 +	skb_shinfo(skb)->nr_frags = 0;
  33.703 +	skb_shinfo(skb)->frag_list = NULL;
  33.704 +
  33.705 +	/* We are no longer a clone, even if we were. */
  33.706 +	skb->cloned = 0;
  33.707 +
  33.708 +	skb->tail += skb->data_len;
  33.709 +	skb->data_len = 0;
  33.710 +	return 0;
  33.711 +}
  33.712 +
  33.713 +
  33.714 +/**
  33.715 + *	pskb_copy	-	create copy of an sk_buff with private head.
  33.716 + *	@skb: buffer to copy
  33.717 + *	@gfp_mask: allocation priority
  33.718 + *
  33.719 + *	Make a copy of both an &sk_buff and part of its data, located
  33.720 + *	in header. Fragmented data remain shared. This is used when
  33.721 + *	the caller wishes to modify only header of &sk_buff and needs
  33.722 + *	private copy of the header to alter. Returns %NULL on failure
  33.723 + *	or the pointer to the buffer on success.
  33.724 + *	The returned buffer has a reference count of 1.
  33.725 + */
  33.726 +
  33.727 +struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask)
  33.728 +{
  33.729 +	struct sk_buff *n;
  33.730 +
  33.731 +	/*
  33.732 +	 *	Allocate the copy buffer
  33.733 +	 */
  33.734 +	n=alloc_skb(skb->end - skb->head, gfp_mask);
  33.735 +	if(n==NULL)
  33.736 +		return NULL;
  33.737 +
  33.738 +	/* Set the data pointer */
  33.739 +	skb_reserve(n,skb->data-skb->head);
  33.740 +	/* Set the tail pointer and length */
  33.741 +	skb_put(n,skb_headlen(skb));
  33.742 +	/* Copy the bytes */
  33.743 +	memcpy(n->data, skb->data, n->len);
  33.744 +	n->csum = skb->csum;
  33.745 +	n->ip_summed = skb->ip_summed;
  33.746 +
  33.747 +	n->data_len = skb->data_len;
  33.748 +	n->len = skb->len;
  33.749 +
  33.750 +	if (skb_shinfo(skb)->nr_frags) {
  33.751 +		int i;
  33.752 +
  33.753 +		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
  33.754 +			skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
  33.755 +			get_page(skb_shinfo(n)->frags[i].page);
  33.756 +		}
  33.757 +		skb_shinfo(n)->nr_frags = i;
  33.758 +	}
  33.759 +
  33.760 +	if (skb_shinfo(skb)->frag_list) {
  33.761 +		skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list;
  33.762 +		skb_clone_fraglist(n);
  33.763 +	}
  33.764 +
  33.765 +	copy_skb_header(n, skb);
  33.766 +
  33.767 +	return n;
  33.768 +}
  33.769 +
  33.770 +/**
  33.771 + *	pskb_expand_head - reallocate header of &sk_buff
  33.772 + *	@skb: buffer to reallocate
  33.773 + *	@nhead: room to add at head
  33.774 + *	@ntail: room to add at tail
  33.775 + *	@gfp_mask: allocation priority
  33.776 + *
  33.777 + *	Expands (or creates identical copy, if &nhead and &ntail are zero)
  33.778 + *	header of skb. &sk_buff itself is not changed. &sk_buff MUST have
  33.779 + *	reference count of 1. Returns zero in the case of success or error,
  33.780 + *	if expansion failed. In the last case, &sk_buff is not changed.
  33.781 + *
  33.782 + *	All the pointers pointing into skb header may change and must be
  33.783 + *	reloaded after call to this function.
  33.784 + */
  33.785 +
  33.786 +int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask)
  33.787 +{
  33.788 +	int i;
  33.789 +	u8 *data;
  33.790 +	int size = nhead + (skb->end - skb->head) + ntail;
  33.791 +	long off;
  33.792 +
  33.793 +	if (skb_shared(skb))
  33.794 +		BUG();
  33.795 +
  33.796 +	size = SKB_DATA_ALIGN(size);
  33.797 +
  33.798 +	data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
  33.799 +	if (data == NULL)
  33.800 +		goto nodata;
  33.801 +
  33.802 +	/* Copy only real data... and, alas, header. This should be
  33.803 +	 * optimized for the cases when header is void. */
  33.804 +	memcpy(data+nhead, skb->head, skb->tail-skb->head);
  33.805 +	memcpy(data+size, skb->end, sizeof(struct skb_shared_info));
  33.806 +
  33.807 +	for (i=0; i<skb_shinfo(skb)->nr_frags; i++)
  33.808 +		get_page(skb_shinfo(skb)->frags[i].page);
  33.809 +
  33.810 +	if (skb_shinfo(skb)->frag_list)
  33.811 +		skb_clone_fraglist(skb);
  33.812 +
  33.813 +	skb_release_data(skb);
  33.814 +
  33.815 +	off = (data+nhead) - skb->head;
  33.816 +
  33.817 +	skb->head = data;
  33.818 +	skb->end  = data+size;
  33.819 +
  33.820 +	skb->data += off;
  33.821 +	skb->tail += off;
  33.822 +	skb->mac.raw += off;
  33.823 +	skb->h.raw += off;
  33.824 +	skb->nh.raw += off;
  33.825 +	skb->cloned = 0;
  33.826 +	atomic_set(&skb_shinfo(skb)->dataref, 1);
  33.827 +	return 0;
  33.828 +
  33.829 +nodata:
  33.830 +	return -ENOMEM;
  33.831 +}
  33.832 +
  33.833 +/* Make private copy of skb with writable head and some headroom */
  33.834 +
  33.835 +struct sk_buff *
  33.836 +skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom)
  33.837 +{
  33.838 +	struct sk_buff *skb2;
  33.839 +	int delta = headroom - skb_headroom(skb);
  33.840 +
  33.841 +	if (delta <= 0)
  33.842 +		return pskb_copy(skb, GFP_ATOMIC);
  33.843 +
  33.844 +	skb2 = skb_clone(skb, GFP_ATOMIC);
  33.845 +	if (skb2 == NULL ||
  33.846 +	    !pskb_expand_head(skb2, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC))
  33.847 +		return skb2;
  33.848 +
  33.849 +	kfree_skb(skb2);
  33.850 +	return NULL;
  33.851 +}
  33.852 +
  33.853 +
  33.854 +/**
  33.855 + *	skb_copy_expand	-	copy and expand sk_buff
  33.856 + *	@skb: buffer to copy
  33.857 + *	@newheadroom: new free bytes at head
  33.858 + *	@newtailroom: new free bytes at tail
  33.859 + *	@gfp_mask: allocation priority
  33.860 + *
  33.861 + *	Make a copy of both an &sk_buff and its data and while doing so 
  33.862 + *	allocate additional space.
  33.863 + *
  33.864 + *	This is used when the caller wishes to modify the data and needs a 
  33.865 + *	private copy of the data to alter as well as more space for new fields.
  33.866 + *	Returns %NULL on failure or the pointer to the buffer
  33.867 + *	on success. The returned buffer has a reference count of 1.
  33.868 + *
  33.869 + *	You must pass %GFP_ATOMIC as the allocation priority if this function
  33.870 + *	is called from an interrupt.
  33.871 + */
  33.872 + 
  33.873 +
  33.874 +struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
  33.875 +				int newheadroom,
  33.876 +				int newtailroom,
  33.877 +				int gfp_mask)
  33.878 +{
  33.879 +	struct sk_buff *n;
  33.880 +
  33.881 +	/*
  33.882 +	 *	Allocate the copy buffer
  33.883 +	 */
  33.884 + 	 
  33.885 +	n=alloc_skb(newheadroom + skb->len + newtailroom,
  33.886 +		    gfp_mask);
  33.887 +	if(n==NULL)
  33.888 +		return NULL;
  33.889 +
  33.890 +	skb_reserve(n,newheadroom);
  33.891 +
  33.892 +	/* Set the tail pointer and length */
  33.893 +	skb_put(n,skb->len);
  33.894 +
  33.895 +	/* Copy the data only. */
  33.896 +	if (skb_copy_bits(skb, 0, n->data, skb->len))
  33.897 +		BUG();
  33.898 +
  33.899 +	copy_skb_header(n, skb);
  33.900 +	return n;
  33.901 +}
  33.902 +
  33.903 +/* Trims skb to length len. It can change skb pointers, if "realloc" is 1.
  33.904 + * If realloc==0 and trimming is impossible without change of data,
  33.905 + * it is BUG().
  33.906 + */
  33.907 +
  33.908 +int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc)
  33.909 +{
  33.910 +	int offset = skb_headlen(skb);
  33.911 +	int nfrags = skb_shinfo(skb)->nr_frags;
  33.912 +	int i;
  33.913 +
  33.914 +	for (i=0; i<nfrags; i++) {
  33.915 +		int end = offset + skb_shinfo(skb)->frags[i].size;
  33.916 +		if (end > len) {
  33.917 +			if (skb_cloned(skb)) {
  33.918 +				if (!realloc)
  33.919 +					BUG();
  33.920 +				if (!pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
  33.921 +					return -ENOMEM;
  33.922 +			}
  33.923 +			if (len <= offset) {
  33.924 +				put_page(skb_shinfo(skb)->frags[i].page);
  33.925 +				skb_shinfo(skb)->nr_frags--;
  33.926 +			} else {
  33.927 +				skb_shinfo(skb)->frags[i].size = len-offset;
  33.928 +			}
  33.929 +		}
  33.930 +		offset = end;
  33.931 +	}
  33.932 +
  33.933 +	if (offset < len) {
  33.934 +		skb->data_len -= skb->len - len;
  33.935 +		skb->len = len;
  33.936 +	} else {
  33.937 +		if (len <= skb_headlen(skb)) {
  33.938 +			skb->len = len;
  33.939 +			skb->data_len = 0;
  33.940 +			skb->tail = skb->data + len;
  33.941 +			if (skb_shinfo(skb)->frag_list && !skb_cloned(skb))
  33.942 +				skb_drop_fraglist(skb);
  33.943 +		} else {
  33.944 +			skb->data_len -= skb->len - len;
  33.945 +			skb->len = len;
  33.946 +		}
  33.947 +	}
  33.948 +
  33.949 +	return 0;
  33.950 +}
  33.951 +
  33.952 +/**
  33.953 + *	__pskb_pull_tail - advance tail of skb header 
  33.954 + *	@skb: buffer to reallocate
  33.955 + *	@delta: number of bytes to advance tail
  33.956 + *
  33.957 + *	The function makes a sense only on a fragmented &sk_buff,
  33.958 + *	it expands header moving its tail forward and copying necessary
  33.959 + *	data from fragmented part.
  33.960 + *
  33.961 + *	&sk_buff MUST have reference count of 1.
  33.962 + *
  33.963 + *	Returns %NULL (and &sk_buff does not change) if pull failed
  33.964 + *	or value of new tail of skb in the case of success.
  33.965 + *
  33.966 + *	All the pointers pointing into skb header may change and must be
  33.967 + *	reloaded after call to this function.
  33.968 + */
  33.969 +
  33.970 +/* Moves tail of skb head forward, copying data from fragmented part,
  33.971 + * when it is necessary.
  33.972 + * 1. It may fail due to malloc failure.
  33.973 + * 2. It may change skb pointers.
  33.974 + *
  33.975 + * It is pretty complicated. Luckily, it is called only in exceptional cases.
  33.976 + */
  33.977 +unsigned char * __pskb_pull_tail(struct sk_buff *skb, int delta)
  33.978 +{
  33.979 +	int i, k, eat;
  33.980 +
  33.981 +	/* If skb has not enough free space at tail, get new one
  33.982 +	 * plus 128 bytes for future expansions. If we have enough
  33.983 +	 * room at tail, reallocate without expansion only if skb is cloned.
  33.984 +	 */
  33.985 +	eat = (skb->tail+delta) - skb->end;
  33.986 +
  33.987 +	if (eat > 0 || skb_cloned(skb)) {
  33.988 +		if (pskb_expand_head(skb, 0, eat>0 ? eat+128 : 0, GFP_ATOMIC))
  33.989 +			return NULL;
  33.990 +	}
  33.991 +
  33.992 +	if (skb_copy_bits(skb, skb_headlen(skb), skb->tail, delta))
  33.993 +		BUG();
  33.994 +
  33.995 +	/* Optimization: no fragments, no reasons to preestimate
  33.996 +	 * size of pulled pages. Superb.
  33.997 +	 */
  33.998 +	if (skb_shinfo(skb)->frag_list == NULL)
  33.999 +		goto pull_pages;
 33.1000 +
 33.1001 +	/* Estimate size of pulled pages. */
 33.1002 +	eat = delta;
 33.1003 +	for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
 33.1004 +		if (skb_shinfo(skb)->frags[i].size >= eat)
 33.1005 +			goto pull_pages;
 33.1006 +		eat -= skb_shinfo(skb)->frags[i].size;
 33.1007 +	}
 33.1008 +
 33.1009 +	/* If we need update frag list, we are in troubles.
 33.1010 +	 * Certainly, it possible to add an offset to skb data,
 33.1011 +	 * but taking into account that pulling is expected to
 33.1012 +	 * be very rare operation, it is worth to fight against
 33.1013 +	 * further bloating skb head and crucify ourselves here instead.
 33.1014 +	 * Pure masohism, indeed. 8)8)
 33.1015 +	 */
 33.1016 +	if (eat) {
 33.1017 +		struct sk_buff *list = skb_shinfo(skb)->frag_list;
 33.1018 +		struct sk_buff *clone = NULL;
 33.1019 +		struct sk_buff *insp = NULL;
 33.1020 +
 33.1021 +		do {
 33.1022 +			if (list == NULL)
 33.1023 +				BUG();
 33.1024 +
 33.1025 +			if (list->len <= eat) {
 33.1026 +				/* Eaten as whole. */
 33.1027 +				eat -= list->len;
 33.1028 +				list = list->next;
 33.1029 +				insp = list;
 33.1030 +			} else {
 33.1031 +				/* Eaten partially. */
 33.1032 +
 33.1033 +				if (skb_shared(list)) {
 33.1034 +					/* Sucks! We need to fork list. :-( */
 33.1035 +					clone = skb_clone(list, GFP_ATOMIC);
 33.1036 +					if (clone == NULL)
 33.1037 +						return NULL;
 33.1038 +					insp = list->next;
 33.1039 +					list = clone;
 33.1040 +				} else {
 33.1041 +					/* This may be pulled without
 33.1042 +					 * problems. */
 33.1043 +					insp = list;
 33.1044 +				}
 33.1045 +				if (pskb_pull(list, eat) == NULL) {
 33.1046 +					if (clone)
 33.1047 +						kfree_skb(clone);
 33.1048 +					return NULL;
 33.1049 +				}
 33.1050 +				break;
 33.1051 +			}
 33.1052 +		} while (eat);
 33.1053 +
 33.1054 +		/* Free pulled out fragments. */
 33.1055 +		while ((list = skb_shinfo(skb)->frag_list) != insp) {
 33.1056 +			skb_shinfo(skb)->frag_list = list->next;
 33.1057 +			kfree_skb(list);
 33.1058 +		}
 33.1059 +		/* And insert new clone at head. */
 33.1060 +		if (clone) {
 33.1061 +			clone->next = list;
 33.1062 +			skb_shinfo(skb)->frag_list = clone;
 33.1063 +		}
 33.1064 +	}
 33.1065 +	/* Success! Now we may commit changes to skb data. */
 33.1066 +
 33.1067 +pull_pages:
 33.1068 +	eat = delta;
 33.1069 +	k = 0;
 33.1070 +	for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
 33.1071 +		if (skb_shinfo(skb)->frags[i].size <= eat) {
 33.1072 +			put_page(skb_shinfo(skb)->frags[i].page);
 33.1073 +			eat -= skb_shinfo(skb)->frags[i].size;
 33.1074 +		} else {
 33.1075 +			skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
 33.1076 +			if (eat) {
 33.1077 +				skb_shinfo(skb)->frags[k].page_offset += eat;
 33.1078 +				skb_shinfo(skb)->frags[k].size -= eat;
 33.1079 +				eat = 0;
 33.1080 +			}
 33.1081 +			k++;
 33.1082 +		}
 33.1083 +	}
 33.1084 +	skb_shinfo(skb)->nr_frags = k;
 33.1085 +
 33.1086 +	skb->tail += delta;
 33.1087 +	skb->data_len -= delta;
 33.1088 +
 33.1089 +	return skb->tail;
 33.1090 +}
 33.1091 +
 33.1092 +/* Copy some data bits from skb to kernel buffer. */
 33.1093 +
 33.1094 +int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
 33.1095 +{
 33.1096 +	int i, copy;
 33.1097 +	int start = skb->len - skb->data_len;
 33.1098 +
 33.1099 +	if (offset > (int)skb->len-len)
 33.1100 +		goto fault;
 33.1101 +
 33.1102 +	/* Copy header. */
 33.1103 +	if ((copy = start-offset) > 0) {
 33.1104 +		if (copy > len)
 33.1105 +			copy = len;
 33.1106 +		memcpy(to, skb->data + offset, copy);
 33.1107 +		if ((len -= copy) == 0)
 33.1108 +			return 0;
 33.1109 +		offset += copy;
 33.1110 +		to += copy;
 33.1111 +	}
 33.1112 +
 33.1113 +	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
 33.1114 +		int end;
 33.1115 +
 33.1116 +		BUG_TRAP(start <= offset+len);
 33.1117 +
 33.1118 +		end = start + skb_shinfo(skb)->frags[i].size;
 33.1119 +		if ((copy = end-offset) > 0) {
 33.1120 +			u8 *vaddr;
 33.1121 +
 33.1122 +			if (copy > len)
 33.1123 +				copy = len;
 33.1124 +
 33.1125 +			vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]);
 33.1126 +			memcpy(to, vaddr+skb_shinfo(skb)->frags[i].page_offset+
 33.1127 +			       offset-start, copy);
 33.1128 +			kunmap_skb_frag(vaddr);
 33.1129 +
 33.1130 +			if ((len -= copy) == 0)
 33.1131 +				return 0;
 33.1132 +			offset += copy;
 33.1133 +			to += copy;
 33.1134 +		}
 33.1135 +		start = end;
 33.1136 +	}
 33.1137 +
 33.1138 +	if (skb_shinfo(skb)->frag_list) {
 33.1139 +		struct sk_buff *list;
 33.1140 +
 33.1141 +		for (list = skb_shinfo(skb)->frag_list; list; list=list->next) {
 33.1142 +			int end;
 33.1143 +
 33.1144 +			BUG_TRAP(start <= offset+len);
 33.1145 +
 33.1146 +			end = start + list->len;
 33.1147 +			if ((copy = end-offset) > 0) {
 33.1148 +				if (copy > len)
 33.1149 +					copy = len;
 33.1150 +				if (skb_copy_bits(list, offset-start, to, copy))
 33.1151 +					goto fault;
 33.1152 +				if ((len -= copy) == 0)
 33.1153 +					return 0;
 33.1154 +				offset += copy;
 33.1155 +				to += copy;
 33.1156 +			}
 33.1157 +			start = end;
 33.1158 +		}
 33.1159 +	}
 33.1160 +	if (len == 0)
 33.1161 +		return 0;
 33.1162 +
 33.1163 +fault:
 33.1164 +	return -EFAULT;
 33.1165 +}
 33.1166 +
 33.1167 +/* Checksum skb data. */
 33.1168 +
 33.1169 +unsigned int skb_checksum(const struct sk_buff *skb, int offset, int len, unsigned int csum)
 33.1170 +{
 33.1171 +	int i, copy;
 33.1172 +	int start = skb->len - skb->data_len;
 33.1173 +	int pos = 0;
 33.1174 +
 33.1175 +	/* Checksum header. */
 33.1176 +	if ((copy = start-offset) > 0) {
 33.1177 +		if (copy > len)
 33.1178 +			copy = len;
 33.1179 +		csum = csum_partial(skb->data+offset, copy, csum);
 33.1180 +		if ((len -= copy) == 0)
 33.1181 +			return csum;
 33.1182 +		offset += copy;
 33.1183 +		pos = copy;
 33.1184 +	}
 33.1185 +
 33.1186 +	for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
 33.1187 +		int end;
 33.1188 +
 33.1189 +		BUG_TRAP(start <= offset+len);
 33.1190 +
 33.1191 +		end = start + skb_shinfo(skb)->frags[i].size;
 33.1192 +		if ((copy = end-offset) > 0) {
 33.1193 +			unsigned int csum2;
 33.1194 +			u8 *vaddr;
 33.1195 +			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 33.1196 +
 33.1197 +			if (copy > len)
 33.1198 +				copy = len;
 33.1199 +			vaddr = kmap_skb_frag(frag);
 33.1200 +			csum2 = csum_partial(vaddr + frag->page_offset +
 33.1201 +					     offset-start, copy, 0);
 33.1202 +			kunmap_skb_frag(vaddr);
 33.1203 +			csum = csum_block_add(csum, csum2, pos);
 33.1204 +			if (!(len -= copy))
 33.1205 +				return csum;
 33.1206 +			offset += copy;
 33.1207 +			pos += copy;
 33.1208 +		}
 33.1209 +		start = end;
 33.1210 +	}
 33.1211 +
 33.1212 +	if (skb_shinfo(skb)->frag_list) {
 33.1213 +		struct sk_buff *list;
 33.1214 +
 33.1215 +		for (list = skb_shinfo(skb)->frag_list; list; list=list->next) {
 33.1216 +			int end;
 33.1217 +
 33.1218 +			BUG_TRAP(start <= offset+len);
 33.1219 +
 33.1220 +			end = start + list->len;
 33.1221 +			if ((copy = end-offset) > 0) {
 33.1222 +				unsigned int csum2;
 33.1223 +				if (copy > len)
 33.1224 +					copy = len;
 33.1225 +				csum2 = skb_checksum(list, offset-start, copy, 0);
 33.1226 +				csum = csum_block_add(csum, csum2, pos);
 33.1227 +				if ((len -= copy) == 0)
 33.1228 +					return csum;
 33.1229 +				offset += copy;
 33.1230 +				pos += copy;
 33.1231 +			}
 33.1232 +			start = end;
 33.1233 +		}
 33.1234 +	}
 33.1235 +	if (len == 0)
 33.1236 +		return csum;
 33.1237 +
 33.1238 +	BUG();
 33.1239 +	return csum;
 33.1240 +}
 33.1241 +
 33.1242 +/* Both of above in one bottle. */
 33.1243 +
 33.1244 +unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int csum)
 33.1245 +{
 33.1246 +	int i, copy;
 33.1247 +	int start = skb->len - skb->data_len;
 33.1248 +	int pos = 0;
 33.1249 +
 33.1250 +	/* Copy header. */
 33.1251 +	if ((copy = start-offset) > 0) {
 33.1252 +		if (copy > len)
 33.1253 +			copy = len;
 33.1254 +		csum = csum_partial_copy_nocheck(skb->data+offset, to, copy, csum);
 33.1255 +		if ((len -= copy) == 0)
 33.1256 +			return csum;
 33.1257 +		offset += copy;
 33.1258 +		to += copy;
 33.1259 +		pos = copy;
 33.1260 +	}
 33.1261 +
 33.1262 +	for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
 33.1263 +		int end;
 33.1264 +
 33.1265 +		BUG_TRAP(start <= offset+len);
 33.1266 +
 33.1267 +		end = start + skb_shinfo(skb)->frags[i].size;
 33.1268 +		if ((copy = end-offset) > 0) {
 33.1269 +			unsigned int csum2;
 33.1270 +			u8 *vaddr;
 33.1271 +			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 33.1272 +
 33.1273 +			if (copy > len)
 33.1274 +				copy = len;
 33.1275 +			vaddr = kmap_skb_frag(frag);
 33.1276 +			csum2 = csum_partial_copy_nocheck(vaddr + frag->page_offset +
 33.1277 +						      offset-start, to, copy, 0);
 33.1278 +			kunmap_skb_frag(vaddr);
 33.1279 +			csum = csum_block_add(csum, csum2, pos);
 33.1280 +			if (!(len -= copy))
 33.1281 +				return csum;
 33.1282 +			offset += copy;
 33.1283 +			to += copy;
 33.1284 +			pos += copy;
 33.1285 +		}
 33.1286 +		start = end;
 33.1287 +	}
 33.1288 +
 33.1289 +	if (skb_shinfo(skb)->frag_list) {
 33.1290 +		struct sk_buff *list;
 33.1291 +
 33.1292 +		for (list = skb_shinfo(skb)->frag_list; list; list=list->next) {
 33.1293 +			unsigned int csum2;
 33.1294 +			int end;
 33.1295 +
 33.1296 +			BUG_TRAP(start <= offset+len);
 33.1297 +
 33.1298 +			end = start + list->len;
 33.1299 +			if ((copy = end-offset) > 0) {
 33.1300 +				if (copy > len)
 33.1301 +					copy = len;
 33.1302 +				csum2 = skb_copy_and_csum_bits(list, offset-start, to, copy, 0);
 33.1303 +				csum = csum_block_add(csum, csum2, pos);
 33.1304 +				if ((len -= copy) == 0)
 33.1305 +					return csum;
 33.1306 +				offset += copy;
 33.1307 +				to += copy;
 33.1308 +				pos += copy;
 33.1309 +			}
 33.1310 +			start = end;
 33.1311 +		}
 33.1312 +	}
 33.1313 +	if (len == 0)
 33.1314 +		return csum;
 33.1315 +
 33.1316 +	BUG();
 33.1317 +	return csum;
 33.1318 +}
 33.1319 +
 33.1320 +void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)
 33.1321 +{
 33.1322 +	unsigned int csum;
 33.1323 +	long csstart;
 33.1324 +
 33.1325 +	if (skb->ip_summed == CHECKSUM_HW)
 33.1326 +		csstart = skb->h.raw - skb->data;
 33.1327 +	else
 33.1328 +		csstart = skb->len - skb->data_len;
 33.1329 +
 33.1330 +	if (csstart > skb->len - skb->data_len)
 33.1331 +		BUG();
 33.1332 +
 33.1333 +	memcpy(to, skb->data, csstart);
 33.1334 +
 33.1335 +	csum = 0;
 33.1336 +	if (csstart != skb->len)
 33.1337 +		csum = skb_copy_and_csum_bits(skb, csstart, to+csstart,
 33.1338 +				skb->len-csstart, 0);
 33.1339 +
 33.1340 +	if (skb->ip_summed == CHECKSUM_HW) {
 33.1341 +		long csstuff = csstart + skb->csum;
 33.1342 +
 33.1343 +		*((unsigned short *)(to + csstuff)) = csum_fold(csum);
 33.1344 +	}
 33.1345 +}
 33.1346 +
 33.1347 +#if 0
 33.1348 +/* 
 33.1349 + * 	Tune the memory allocator for a new MTU size.
 33.1350 + */
 33.1351 +void skb_add_mtu(int mtu)
 33.1352 +{
 33.1353 +	/* Must match allocation in alloc_skb */
 33.1354 +	mtu = SKB_DATA_ALIGN(mtu) + sizeof(struct skb_shared_info);
 33.1355 +
 33.1356 +	kmem_add_cache_size(mtu);
 33.1357 +}
 33.1358 +#endif
 33.1359 +
 33.1360 +void __init skb_init(void)
 33.1361 +{
 33.1362 +	int i;
 33.1363 +
 33.1364 +	skbuff_head_cache = kmem_cache_create("skbuff_head_cache",
 33.1365 +					      sizeof(struct sk_buff),
 33.1366 +					      0,
 33.1367 +					      SLAB_HWCACHE_ALIGN,
 33.1368 +					      skb_headerinit, NULL);
 33.1369 +	if (!skbuff_head_cache)
 33.1370 +		panic("cannot create skbuff cache");
 33.1371 +
 33.1372 +        //init_net_pages(NUM_NET_PAGES);
 33.1373 +
 33.1374 +	for (i=0; i<NR_CPUS; i++)
 33.1375 +		skb_queue_head_init(&skb_head_pool[i].list);
 33.1376 +}