ia64/xen-unstable

changeset 121:82679de8a1ca

bitkeeper revision 1.15.1.13 (3e41144dWc5GH88F3idrXT41kpovhQ)

Zero copy RX path is working with guest-allocated page pool.
author akw27@boulderdash.cl.cam.ac.uk
date Wed Feb 05 13:40:29 2003 +0000 (2003-02-05)
parents cb2688ed1a23
children b1b1608f5d5c
files xen-2.4.16/common/domain.c xen-2.4.16/common/event.c xen-2.4.16/common/network.c xen-2.4.16/include/hypervisor-ifs/network.h xen-2.4.16/include/xeno/vif.h xen-2.4.16/net/dev.c xen-2.4.16/net/skbuff.c xenolinux-2.4.16-sparse/arch/xeno/drivers/network/network.c xenolinux-2.4.16-sparse/include/asm-xeno/io.h xenolinux-2.4.16-sparse/net/core/skbuff.c
line diff
     1.1 --- a/xen-2.4.16/common/domain.c	Sat Feb 01 12:06:32 2003 +0000
     1.2 +++ b/xen-2.4.16/common/domain.c	Wed Feb 05 13:40:29 2003 +0000
     1.3 @@ -402,8 +402,8 @@ extern module_t *mod;
     1.4  extern unsigned char *cmdline;
     1.5  int setup_guestos(struct task_struct *p, dom0_newdomain_t *params)
     1.6  {
     1.7 -#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED)
     1.8 -#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED|_PAGE_DIRTY)
     1.9 +#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED)
    1.10 +#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED|_PAGE_DIRTY)
    1.11  #define ALLOC_FRAME_FROM_DOMAIN() (alloc_address -= PAGE_SIZE)
    1.12      char *src, *dst;
    1.13      int i, dom = p->domain;
     2.1 --- a/xen-2.4.16/common/event.c	Sat Feb 01 12:06:32 2003 +0000
     2.2 +++ b/xen-2.4.16/common/event.c	Wed Feb 05 13:40:29 2003 +0000
     2.3 @@ -14,13 +14,13 @@
     2.4  typedef void (*hyp_event_callback_fn_t)(void);
     2.5  
     2.6  extern void schedule(void);
     2.7 -extern void flush_rx_queue(void);
     2.8 +extern void update_shared_ring(void);
     2.9  
    2.10  /* Ordering must match definitions of _HYP_EVENT_* in xeno/sched.h */
    2.11  static hyp_event_callback_fn_t event_call_fn[] = 
    2.12  {
    2.13      schedule,
    2.14 -    flush_rx_queue,
    2.15 +    update_shared_ring,
    2.16      kill_domain
    2.17  };
    2.18  
     3.1 --- a/xen-2.4.16/common/network.c	Sat Feb 01 12:06:32 2003 +0000
     3.2 +++ b/xen-2.4.16/common/network.c	Wed Feb 05 13:40:29 2003 +0000
     3.3 @@ -78,7 +78,7 @@ net_vif_t *create_net_vif(int domain)
     3.4      if ((shadow_ring->tx_ring == NULL) || (shadow_ring->rx_ring == NULL))
     3.5              goto fail;
     3.6  
     3.7 -    shadow_ring->rx_prod = 0;
     3.8 +    shadow_ring->rx_prod = shadow_ring->rx_cons = shadow_ring->rx_idx = 0;
     3.9      
    3.10      // fill in the new vif struct.
    3.11      
     4.1 --- a/xen-2.4.16/include/hypervisor-ifs/network.h	Sat Feb 01 12:06:32 2003 +0000
     4.2 +++ b/xen-2.4.16/include/hypervisor-ifs/network.h	Wed Feb 05 13:40:29 2003 +0000
     4.3 @@ -119,5 +119,6 @@ int add_net_rule(net_rule_t *rule);
     4.4  
     4.5  #define RING_STATUS_OK               0  // Everything is gravy.
     4.6  #define RING_STATUS_ERR_CFU         -1  // Copy from user problems.
     4.7 +#define RING_STATUS_BAD_PAGE        -2  // What they gave us was pure evil.
     4.8  
     4.9  #endif
     5.1 --- a/xen-2.4.16/include/xeno/vif.h	Sat Feb 01 12:06:32 2003 +0000
     5.2 +++ b/xen-2.4.16/include/xeno/vif.h	Wed Feb 05 13:40:29 2003 +0000
     5.3 @@ -42,7 +42,7 @@ typedef struct rx_shadow_entry_st {
     5.4  typedef struct net_shadow_ring_st {
     5.5      tx_shadow_entry_t *tx_ring;
     5.6      rx_shadow_entry_t *rx_ring;
     5.7 -    unsigned int rx_prod;  // trying to add shadow pointers only as I need to.
     5.8 +    unsigned int rx_prod, rx_cons, rx_idx;
     5.9  } net_shadow_ring_t;
    5.10  
    5.11  typedef struct net_vif_st {
     6.1 --- a/xen-2.4.16/net/dev.c	Sat Feb 01 12:06:32 2003 +0000
     6.2 +++ b/xen-2.4.16/net/dev.c	Wed Feb 05 13:40:29 2003 +0000
     6.3 @@ -31,6 +31,7 @@
     6.4  
     6.5  #include <linux/event.h>
     6.6  #include <asm/domain_page.h>
     6.7 +#include <asm/pgalloc.h>
     6.8  
     6.9  #define BUG_TRAP ASSERT
    6.10  #define notifier_call_chain(_a,_b,_c) ((void)0)
    6.11 @@ -39,6 +40,12 @@
    6.12  #define rtnl_unlock() ((void)0)
    6.13  #define dst_init() ((void)0)
    6.14  
    6.15 +// Ring defines:
    6.16 +#define TX_RING_INC(_i)    (((_i)+1) & (TX_RING_SIZE-1))
    6.17 +#define RX_RING_INC(_i)    (((_i)+1) & (RX_RING_SIZE-1))
    6.18 +#define TX_RING_ADD(_i,_j) (((_i)+(_j)) & (TX_RING_SIZE-1))
    6.19 +#define RX_RING_ADD(_i,_j) (((_i)+(_j)) & (RX_RING_SIZE-1))
    6.20 +
    6.21  struct net_device *the_dev = NULL;
    6.22  
    6.23  /*
    6.24 @@ -48,11 +55,11 @@ struct net_device *the_dev = NULL;
    6.25  struct softnet_data softnet_data[NR_CPUS] __cacheline_aligned;
    6.26  
    6.27  
    6.28 -/*****************************************************************************************
    6.29 +/*********************************************************************************
    6.30  
    6.31  			    Device Interface Subroutines
    6.32  
    6.33 -******************************************************************************************/
    6.34 +**********************************************************************************/
    6.35  
    6.36  /**
    6.37   *	__dev_get_by_name	- find a device by its name 
    6.38 @@ -662,7 +669,83 @@ static void get_sample_stats(int cpu)
    6.39  	softnet_data[cpu].avg_blog = avg_blog;
    6.40  }
    6.41  
    6.42 +void deliver_packet(struct sk_buff *skb, net_vif_t *vif)
    6.43 +{
    6.44 +        net_shadow_ring_t *shadow_ring;
    6.45 +        rx_shadow_entry_t *rx;
    6.46 +        unsigned long *g_pte, tmp;
    6.47 +        struct pfn_info *g_pfn, *h_pfn;
    6.48 +        unsigned int i; //, nvif;
    6.49  
    6.50 +        if (skb->skb_type != SKB_ZERO_COPY) 
    6.51 +            return;
    6.52 +        
    6.53 +        /*
    6.54 +         * Write the virtual MAC address into the destination field
    6.55 +         * of the ethernet packet. Furthermore, do the same for ARP
    6.56 +         * reply packets. This is easy because the virtual MAC address
    6.57 +         * is always 00-[nn]-00-00-00-00, where the second sixteen bits 
    6.58 +         * of the MAC are the vif's id.  This is to differentiate between
    6.59 +         * vifs on guests that have more than one.
    6.60 +         *
    6.61 +         * In zero copy, the data pointers for the packet have to have been 
    6.62 +         * mapped in by the caller.
    6.63 +         */
    6.64 +
    6.65 +        memset(skb->mac.ethernet->h_dest, 0, ETH_ALEN);
    6.66 +//        *(unsigned int *)(skb->mac.ethernet->h_dest + 1) = nvif;
    6.67 +        if ( ntohs(skb->mac.ethernet->h_proto) == ETH_P_ARP )
    6.68 +        {
    6.69 +            memset(skb->nh.raw + 18, 0, ETH_ALEN);
    6.70 +//            *(unsigned int *)(skb->nh.raw + 18 + 1) = nvif;
    6.71 +        }
    6.72 +        shadow_ring = vif->shadow_ring;
    6.73 +
    6.74 +        //Advance to next good buffer.
    6.75 +        for (i = shadow_ring->rx_cons; 
    6.76 +             (i != shadow_ring->rx_prod) 
    6.77 +             && ( shadow_ring->rx_ring[i].status != RING_STATUS_OK );
    6.78 +             i = RX_RING_INC(i));
    6.79 +            
    6.80 +        if (( i != shadow_ring->rx_prod ) &&
    6.81 +            ( shadow_ring->rx_ring[i].status == RING_STATUS_OK ))
    6.82 +        {
    6.83 +            rx = shadow_ring->rx_ring+i;
    6.84 +            if ( (skb->len + ETH_HLEN) < rx->size )
    6.85 +                rx->size = skb->len + ETH_HLEN;
    6.86 +                        
    6.87 +            if (rx->flush_count == tlb_flush_count[smp_processor_id()])
    6.88 +                flush_tlb_all();
    6.89 +            
    6.90 +            g_pte = map_domain_mem(rx->addr);
    6.91 +
    6.92 +            g_pfn =  frame_table + (*g_pte >> PAGE_SHIFT);
    6.93 +            h_pfn = skb->pf;
    6.94 +
    6.95 +            //flip and/or set relevant pf_info fields.
    6.96 +            tmp = g_pfn->next; g_pfn->next = h_pfn->next; h_pfn->next = tmp;
    6.97 +            tmp = g_pfn->prev; g_pfn->prev = h_pfn->prev; h_pfn->prev = tmp;
    6.98 +            tmp = g_pfn->flags; g_pfn->flags = h_pfn->flags; h_pfn->flags = tmp;
    6.99 +            h_pfn->tot_count = 1;
   6.100 +            h_pfn->type_count = g_pfn->type_count;
   6.101 +            g_pfn->tot_count = g_pfn->type_count = 0;
   6.102 +            h_pfn->flags = current->domain | PGT_l1_page_table;
   6.103 +            g_pfn->flags = PGT_l1_page_table;
   6.104 +            //point guest pte at the new page:
   6.105 +            *g_pte = (*g_pte & ~PAGE_MASK) 
   6.106 +                | (((h_pfn - frame_table) << PAGE_SHIFT) & PAGE_MASK);
   6.107 +            *g_pte |= _PAGE_PRESENT;
   6.108 +                
   6.109 +            unmap_domain_mem(g_pte);
   6.110 +            skb->pf = g_pfn; // return the guest pfn to be put on the free list
   6.111 +                
   6.112 +            shadow_ring->rx_cons = RX_RING_INC(i);
   6.113 +        }
   6.114 +}
   6.115 +
   6.116 +/* Deliver skb to an old protocol, which is not threaded well
   6.117 +   or which do not understand shared skbs.
   6.118 + */
   6.119  /**
   6.120   *	netif_rx	-	post buffer to the network code
   6.121   *	@skb: buffer to post
   6.122 @@ -687,12 +770,15 @@ int netif_rx(struct sk_buff *skb)
   6.123  #ifdef CONFIG_SMP
   6.124          unsigned long cpu_mask;
   6.125  #endif
   6.126 +        
   6.127          struct task_struct *p;
   6.128  	int this_cpu = smp_processor_id();
   6.129  	struct softnet_data *queue;
   6.130  	unsigned long flags;
   6.131          net_vif_t *vif;
   6.132  
   6.133 +	local_irq_save(flags);
   6.134 +        
   6.135  	if (skb->stamp.tv_sec == 0)
   6.136  		get_fast_time(&skb->stamp);
   6.137  
   6.138 @@ -709,14 +795,13 @@ int netif_rx(struct sk_buff *skb)
   6.139                  skb_reserve(skb,16); // need to ensure that all the drivers and not just tulip do this.
   6.140                  skb->mac.raw = skb->data;
   6.141                  skb->data += ETH_HLEN;
   6.142 +                skb->nh.raw = skb->data;
   6.143          }
   6.144          
   6.145  	/* The code is rearranged so that the path is the most
   6.146  	   short when CPU is congested, but is still operating.
   6.147  	 */
   6.148  	queue = &softnet_data[this_cpu];
   6.149 -
   6.150 -	local_irq_save(flags);
   6.151          
   6.152  	netdev_rx_stat[this_cpu].total++;
   6.153  
   6.154 @@ -749,7 +834,7 @@ int netif_rx(struct sk_buff *skb)
   6.155              do {
   6.156                  if ( p->domain != vif->domain ) continue;
   6.157                  if ( vif->skb_list.qlen > 100 ) break;
   6.158 -                skb_queue_tail(&vif->skb_list, skb);
   6.159 +                deliver_packet(skb, vif);
   6.160                  cpu_mask = mark_hyp_event(p, _HYP_EVENT_NET_RX);
   6.161                  read_unlock(&tasklist_lock);
   6.162                  goto found;
   6.163 @@ -761,28 +846,24 @@ int netif_rx(struct sk_buff *skb)
   6.164  
   6.165  drop:
   6.166  	netdev_rx_stat[this_cpu].dropped++;
   6.167 -	local_irq_restore(flags);
   6.168 -
   6.169          if (skb->skb_type == SKB_ZERO_COPY)
   6.170                  unmap_domain_mem(skb->head);
   6.171 -        
   6.172  	kfree_skb(skb);
   6.173 +        local_irq_restore(flags);
   6.174  	return NET_RX_DROP;
   6.175  
   6.176  found:
   6.177          if (skb->skb_type == SKB_ZERO_COPY) {
   6.178                  unmap_domain_mem(skb->head);
   6.179 -                //skb->head = (u8 *)((skb->pf - frame_table) << PAGE_SHIFT);
   6.180                  skb->head = skb->data = skb->tail = (void *)0xdeadbeef;
   6.181          }
   6.182 +        kfree_skb(skb);
   6.183          hyp_event_notify(cpu_mask);
   6.184          local_irq_restore(flags);
   6.185          return 0;
   6.186  }
   6.187  
   6.188 -/* Deliver skb to an old protocol, which is not threaded well
   6.189 -   or which do not understand shared skbs.
   6.190 - */
   6.191 +
   6.192  static int deliver_to_old_ones(struct packet_type *pt, struct sk_buff *skb, int last)
   6.193  {
   6.194  	static spinlock_t net_bh_lock = SPIN_LOCK_UNLOCKED;
   6.195 @@ -897,7 +978,35 @@ static inline void handle_diverter(struc
   6.196  }
   6.197  #endif   /* CONFIG_NET_DIVERT */
   6.198  
   6.199 +void update_shared_ring(void)
   6.200 +{
   6.201 +    rx_shadow_entry_t *rx;
   6.202 +    shared_info_t *s = current->shared_info;
   6.203 +    net_ring_t *net_ring;
   6.204 +    net_shadow_ring_t *shadow_ring;
   6.205 +    unsigned int nvif;
   6.206  
   6.207 +    clear_bit(_HYP_EVENT_NET_RX, &current->hyp_events);
   6.208 +    for (nvif = 0; nvif < current->num_net_vifs; nvif++)
   6.209 +    {
   6.210 +        net_ring = current->net_vif_list[nvif]->net_ring;
   6.211 +        shadow_ring = current->net_vif_list[nvif]->shadow_ring;
   6.212 +        while ((shadow_ring->rx_idx != shadow_ring->rx_cons) 
   6.213 +                && (net_ring->rx_cons != net_ring->rx_prod))
   6.214 +        {
   6.215 +            rx = shadow_ring->rx_ring+shadow_ring->rx_idx;
   6.216 +            copy_to_user(net_ring->rx_ring + net_ring->rx_cons, rx, sizeof(rx_entry_t));
   6.217 +
   6.218 +            shadow_ring->rx_idx = RX_RING_INC(shadow_ring->rx_idx);
   6.219 +            net_ring->rx_cons   = RX_RING_INC(net_ring->rx_cons);
   6.220 +
   6.221 +            if ( net_ring->rx_cons == net_ring->rx_event )
   6.222 +                set_bit(_EVENT_NET_RX_FOR_VIF(nvif), &s->events);
   6.223 +            
   6.224 +        }
   6.225 +    }
   6.226 +}
   6.227 +            
   6.228  void flush_rx_queue(void)
   6.229  {
   6.230      struct sk_buff *skb;
   6.231 @@ -906,6 +1015,8 @@ void flush_rx_queue(void)
   6.232      net_shadow_ring_t *shadow_ring;
   6.233      unsigned int i, nvif;
   6.234      rx_shadow_entry_t *rx;
   6.235 +    unsigned long *g_pte, tmp;
   6.236 +    struct pfn_info *g_pfn, *h_pfn;
   6.237      
   6.238      /* I have changed this to batch flush all vifs for a guest
   6.239       * at once, whenever this is called.  Since the guest is about to be
   6.240 @@ -918,7 +1029,6 @@ void flush_rx_queue(void)
   6.241       * loop can be replaced with a translation to the specific NET 
   6.242       * interrupt to serve. --akw
   6.243       */
   6.244 -    
   6.245      clear_bit(_HYP_EVENT_NET_RX, &current->hyp_events);
   6.246  
   6.247      for (nvif = 0; nvif < current->num_net_vifs; nvif++)
   6.248 @@ -928,6 +1038,8 @@ void flush_rx_queue(void)
   6.249          while ( (skb = skb_dequeue(&current->net_vif_list[nvif]->skb_list)) 
   6.250                          != NULL )
   6.251          {
   6.252 +            //temporary hack to stop processing non-zc skbs.
   6.253 +            if (skb->skb_type == SKB_NORMAL) continue;
   6.254              /*
   6.255               * Write the virtual MAC address into the destination field
   6.256               * of the ethernet packet. Furthermore, do the same for ARP
   6.257 @@ -938,6 +1050,16 @@ void flush_rx_queue(void)
   6.258               * second sixteen bits, which are the per-host vif id.
   6.259               * (so eth0 should be 00-00-..., eth1 is 00-01-...)
   6.260               */
   6.261 +            
   6.262 +            if (skb->skb_type == SKB_ZERO_COPY)
   6.263 +            {
   6.264 +                skb->head = (u8 *)map_domain_mem(((skb->pf - frame_table) << PAGE_SHIFT));
   6.265 +                skb->data = skb->head;
   6.266 +                skb_reserve(skb,16); 
   6.267 +                skb->mac.raw = skb->data;
   6.268 +                skb->data += ETH_HLEN;
   6.269 +            }
   6.270 +            
   6.271              memset(skb->mac.ethernet->h_dest, 0, ETH_ALEN);
   6.272              *(unsigned int *)(skb->mac.ethernet->h_dest + 1) = nvif;
   6.273              if ( ntohs(skb->mac.ethernet->h_proto) == ETH_P_ARP )
   6.274 @@ -946,9 +1068,15 @@ void flush_rx_queue(void)
   6.275                  *(unsigned int *)(skb->nh.raw + 18 + 1) = nvif;
   6.276              }
   6.277  
   6.278 +            if (skb->skb_type == SKB_ZERO_COPY)
   6.279 +            {
   6.280 +                unmap_domain_mem(skb->head);
   6.281 +            }
   6.282 +
   6.283              i = net_ring->rx_cons;
   6.284              if ( i != net_ring->rx_prod )
   6.285              {
   6.286 +                net_ring->rx_ring[i].status = shadow_ring->rx_ring[i].status;
   6.287                  if ( shadow_ring->rx_ring[i].status == RING_STATUS_OK)
   6.288                  {
   6.289                      rx = shadow_ring->rx_ring+i;
   6.290 @@ -959,7 +1087,7 @@ void flush_rx_queue(void)
   6.291                       * replaced with a page swizzle.
   6.292                       */
   6.293  
   6.294 -                    if (skb->skb_type == SKB_ZERO_COPY)
   6.295 +                    /*if (skb->skb_type == SKB_ZERO_COPY)
   6.296                      {
   6.297                          skb->head = (u8 *)map_domain_mem(((skb->pf - frame_table) << PAGE_SHIFT));
   6.298                          skb->data = skb->head;
   6.299 @@ -975,7 +1103,49 @@ void flush_rx_queue(void)
   6.300                      {
   6.301                          unmap_domain_mem(skb->head);
   6.302                          skb->head = skb->data = skb->tail = (void *)0xdeadbeef;
   6.303 +                    }*/
   6.304 +
   6.305 +                    //presumably I don't need to rewalk the guest page table
   6.306 +                    //here.
   6.307 +                    if (skb->skb_type == SKB_ZERO_COPY) 
   6.308 +                    {
   6.309 +                        // g_pfn is the frame FROM the guest being given up
   6.310 +                        // h_pfn is the frame FROM the hypervisor, passing up.
   6.311 +                        
   6.312 +                        if (rx->flush_count == tlb_flush_count[smp_processor_id()])
   6.313 +                        {
   6.314 +                            flush_tlb_all();
   6.315 +                        }
   6.316 +                        
   6.317 +                        g_pte = map_domain_mem(rx->addr);
   6.318 +                        
   6.319 +                        //g_pfn = frame_table + (rx->addr >> PAGE_SHIFT);
   6.320 +                        g_pfn =  frame_table + (*g_pte >> PAGE_SHIFT);
   6.321 +                        h_pfn = skb->pf;
   6.322 +
   6.323 +
   6.324 +                        tmp = g_pfn->next; g_pfn->next = h_pfn->next; h_pfn->next = tmp;
   6.325 +                        tmp = g_pfn->prev; g_pfn->prev = h_pfn->prev; h_pfn->prev = tmp;
   6.326 +                        tmp = g_pfn->flags; g_pfn->flags = h_pfn->flags; h_pfn->flags = tmp;
   6.327 +                        
   6.328 +                        h_pfn->tot_count = 1;
   6.329 +                        h_pfn->type_count = g_pfn->type_count;
   6.330 +                        g_pfn->tot_count = g_pfn->type_count = 0;
   6.331 +                        
   6.332 +                        h_pfn->flags = current->domain | PGT_l1_page_table;
   6.333 +                        g_pfn->flags = PGT_l1_page_table;
   6.334 +
   6.335 +
   6.336 +                        *g_pte = (*g_pte & ~PAGE_MASK) | (((h_pfn - frame_table) << PAGE_SHIFT) & PAGE_MASK);
   6.337 +
   6.338 +                        *g_pte |= _PAGE_PRESENT;
   6.339 +                        unmap_domain_mem(g_pte);
   6.340 +
   6.341 +                        skb->pf = g_pfn; // return the guest pfn to be put on the free list
   6.342 +                    } else {
   6.343 +                        BUG(); //got a non-zero copy skb.  which is not good.
   6.344                      }
   6.345 +                    
   6.346                  }
   6.347                  net_ring->rx_cons = (i+1) & (RX_RING_SIZE-1);
   6.348                  if ( net_ring->rx_cons == net_ring->rx_event )
   6.349 @@ -1963,10 +2133,7 @@ int __init net_dev_init(void)
   6.350   * Called from guest OS to notify updates to its transmit and/or receive
   6.351   * descriptor rings.
   6.352   */
   6.353 -#define TX_RING_INC(_i)    (((_i)+1) & (TX_RING_SIZE-1))
   6.354 -#define RX_RING_INC(_i)    (((_i)+1) & (RX_RING_SIZE-1))
   6.355 -#define TX_RING_ADD(_i,_j) (((_i)+(_j)) & (TX_RING_SIZE-1))
   6.356 -#define RX_RING_ADD(_i,_j) (((_i)+(_j)) & (RX_RING_SIZE-1))
   6.357 +
   6.358  long do_net_update(void)
   6.359  {
   6.360      shared_info_t *shared = current->shared_info;    
   6.361 @@ -1976,7 +2143,12 @@ long do_net_update(void)
   6.362      unsigned int i, j;
   6.363      struct sk_buff *skb;
   6.364      tx_entry_t tx;
   6.365 -
   6.366 +    rx_shadow_entry_t *rx;
   6.367 +    unsigned long pfn;
   6.368 +    struct pfn_info *page;
   6.369 +    unsigned long *g_pte;
   6.370 +    
   6.371 +    
   6.372      for ( j = 0; j < current->num_net_vifs; j++)
   6.373      {
   6.374          current_vif = current->net_vif_list[j];
   6.375 @@ -2034,6 +2206,7 @@ long do_net_update(void)
   6.376                  net_get_target_vif(skb);
   6.377                  if ( skb->dst_vif > VIF_PHYSICAL_INTERFACE )
   6.378                  {
   6.379 +printk("LOCAL DELIVERY!\n");
   6.380                      (void)netif_rx(skb);
   6.381                  }
   6.382                  else if ( skb->dst_vif == VIF_PHYSICAL_INTERFACE )
   6.383 @@ -2051,32 +2224,48 @@ long do_net_update(void)
   6.384          net_ring->tx_cons = i;
   6.385  
   6.386          /* Next, pull any new RX descriptors across to the shadow ring.
   6.387 -         * Note that in the next revision, these will reference PTEs and the
   6.388 -         * code here will have to validate reference and flush counts, copy the 
   6.389 -         * descriptor, change the ownership to dom0 and invalidate the client's
   6.390 -         * version of the page.
   6.391           */
   6.392      
   6.393          shadow_ring = current_vif->shadow_ring;
   6.394  
   6.395 -        for (i = shadow_ring->rx_prod; i != net_ring->rx_prod; i = TX_RING_INC(i))
   6.396 +        for (i = shadow_ring->rx_prod; i != net_ring->rx_prod; i = RX_RING_INC(i))
   6.397          {
   6.398 -            /* This copy assumes that rx_shadow_entry_t is an extension of rx_net_entry_t
   6.399 -             * extra fields must be tacked on to the end.
   6.400 +            /* This copy assumes that rx_shadow_entry_t is an extension of 
   6.401 +             * rx_net_entry_t extra fields must be tacked on to the end.
   6.402               */
   6.403 -            
   6.404              if ( copy_from_user( shadow_ring->rx_ring+i, net_ring->rx_ring+i, 
   6.405                                   sizeof (rx_entry_t) ) )
   6.406              {
   6.407                  shadow_ring->rx_ring[i].status = RING_STATUS_ERR_CFU;
   6.408 +                continue;
   6.409              } else {
   6.410 +                    
   6.411 +                rx = shadow_ring->rx_ring + i;
   6.412 +                pfn = rx->addr >> PAGE_SHIFT;
   6.413 +                page = frame_table + pfn;
   6.414 +                
   6.415                  shadow_ring->rx_ring[i].status = RING_STATUS_OK;
   6.416 +
   6.417 +               if  (!(page->flags & PGT_l1_page_table) 
   6.418 +                    || !((page->flags & PG_domain_mask) == current->domain))
   6.419 +                       shadow_ring->rx_ring[i].status = RING_STATUS_BAD_PAGE; 
   6.420 +
   6.421 +
   6.422 +                g_pte = map_domain_mem(rx->addr);
   6.423 +
   6.424 +                if (!(*g_pte & _PAGE_PRESENT))
   6.425 +                        shadow_ring->rx_ring[i].status = RING_STATUS_BAD_PAGE;
   6.426 +                page = (*g_pte >> PAGE_SHIFT) + frame_table;
   6.427 +                if (page->tot_count != 1) 
   6.428 +                        shadow_ring->rx_ring[i].status = RING_STATUS_BAD_PAGE;
   6.429 +                
   6.430 +                *g_pte &= ~_PAGE_PRESENT;
   6.431 +                rx->flush_count = tlb_flush_count[smp_processor_id()];
   6.432 +                unmap_domain_mem(g_pte);
   6.433              }
   6.434          }
   6.435 -
   6.436          shadow_ring->rx_prod = net_ring->rx_prod;
   6.437      }
   6.438 -
   6.439      return 0;
   6.440  }
   6.441  
     7.1 --- a/xen-2.4.16/net/skbuff.c	Sat Feb 01 12:06:32 2003 +0000
     7.2 +++ b/xen-2.4.16/net/skbuff.c	Wed Feb 05 13:40:29 2003 +0000
     7.3 @@ -180,8 +180,8 @@ static inline void dealloc_skb_data_page
     7.4          pf = skb->pf;
     7.5  
     7.6          spin_lock_irqsave(&free_list_lock, flags);
     7.7 -
     7.8 -        list_add_tail(&pf->list, &free_list);
     7.9 +        
    7.10 +        list_add(&pf->list, &free_list);
    7.11          free_pfns++;
    7.12  
    7.13          spin_unlock_irqrestore(&free_list_lock, flags);
    7.14 @@ -213,6 +213,7 @@ struct sk_buff *alloc_zc_skb(unsigned in
    7.15          /* Get the DATA. Size must match skb_add_mtu(). */
    7.16          size = SKB_DATA_ALIGN(size);
    7.17          data = alloc_skb_data_page(skb);
    7.18 +
    7.19          if (data == NULL)
    7.20                  goto nodata;
    7.21  
    7.22 @@ -237,6 +238,7 @@ struct sk_buff *alloc_zc_skb(unsigned in
    7.23          atomic_set(&(skb_shinfo(skb)->dataref), 1);
    7.24          skb_shinfo(skb)->nr_frags = 0;
    7.25          skb_shinfo(skb)->frag_list = NULL;
    7.26 +
    7.27          return skb;
    7.28  
    7.29  nodata:
    7.30 @@ -381,6 +383,7 @@ static void skb_clone_fraglist(struct sk
    7.31  
    7.32  static void skb_release_data(struct sk_buff *skb)
    7.33  {
    7.34 +
    7.35  	if (!skb->cloned ||
    7.36  	    atomic_dec_and_test(&(skb_shinfo(skb)->dataref))) {
    7.37  		if (skb_shinfo(skb)->nr_frags) {
    7.38 @@ -394,10 +397,9 @@ static void skb_release_data(struct sk_b
    7.39  
    7.40                  if (skb->skb_type == SKB_NORMAL) {
    7.41  		    kfree(skb->head);
    7.42 -                } else if (skb->skb_type == SKB_ZERO_COPY) {
    7.43 -                    dealloc_skb_data_page(skb);
    7.44 +                } else if (skb->skb_type == SKB_ZERO_COPY) {                    dealloc_skb_data_page(skb);
    7.45                  } else {
    7.46 -                    printk("skb_release_data called with unknown skb type!\n");
    7.47 +                    BUG(); //skb_release_data called with unknown skb type!
    7.48                  }
    7.49  	}
    7.50  }
    7.51 @@ -436,6 +438,7 @@ void __kfree_skb(struct sk_buff *skb)
    7.52  		}
    7.53  		skb->destructor(skb);
    7.54  	}
    7.55 +
    7.56  #ifdef CONFIG_NETFILTER
    7.57  	nf_conntrack_put(skb->nfct);
    7.58  #endif
     8.1 --- a/xenolinux-2.4.16-sparse/arch/xeno/drivers/network/network.c	Sat Feb 01 12:06:32 2003 +0000
     8.2 +++ b/xenolinux-2.4.16-sparse/arch/xeno/drivers/network/network.c	Wed Feb 05 13:40:29 2003 +0000
     8.3 @@ -192,9 +192,9 @@ static void network_alloc_rx_buffers(str
     8.4          skb = dev_alloc_skb(RX_BUF_SIZE);
     8.5          if ( skb == NULL ) break;
     8.6          skb->dev = dev;
     8.7 -        skb_reserve(skb, 2); /* word align the IP header */
     8.8 +        //skb_reserve(skb, 2); /* word align the IP header */
     8.9          np->rx_skb_ring[i] = skb;
    8.10 -        np->net_ring->rx_ring[i].addr = (unsigned long)skb->data;
    8.11 +        np->net_ring->rx_ring[i].addr = (unsigned long)skb->net_page->ppte; //data;
    8.12          np->net_ring->rx_ring[i].size = RX_BUF_SIZE - 16; /* arbitrary */
    8.13      }
    8.14  
    8.15 @@ -276,10 +276,18 @@ static void network_rx_int(int irq, void
    8.16   again:
    8.17      for ( i = np->rx_idx; i != np->net_ring->rx_cons; i = RX_RING_INC(i) )
    8.18      {
    8.19 +        if (np->net_ring->rx_ring[i].status != RING_STATUS_OK)
    8.20 +        {
    8.21 +                printk("bad buffer on RX ring!(%d)\n", 
    8.22 +                                np->net_ring->rx_ring[i].status);
    8.23 +                continue;
    8.24 +        }
    8.25          skb = np->rx_skb_ring[i];
    8.26 +        
    8.27          skb_put(skb, np->net_ring->rx_ring[i].size);
    8.28          skb->protocol = eth_type_trans(skb, dev);
    8.29          np->stats.rx_packets++;
    8.30 +
    8.31          np->stats.rx_bytes += np->net_ring->rx_ring[i].size;
    8.32          netif_rx(skb);
    8.33          dev->last_rx = jiffies;
     9.1 --- a/xenolinux-2.4.16-sparse/include/asm-xeno/io.h	Sat Feb 01 12:06:32 2003 +0000
     9.2 +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/io.h	Wed Feb 05 13:40:29 2003 +0000
     9.3 @@ -2,7 +2,7 @@
     9.4  #define _ASM_IO_H
     9.5  
     9.6  #include <linux/config.h>
     9.7 -
     9.8 +#include <asm/hypervisor.h>
     9.9  /*
    9.10   * This file contains the definitions for the x86 IO instructions
    9.11   * inb/inw/inl/outb/outw/outl and the "string versions" of the same
    9.12 @@ -74,6 +74,22 @@ static inline void * phys_to_virt(unsign
    9.13  }
    9.14  
    9.15  /*
    9.16 + * Change virtual addresses to machine addresses and vv.
    9.17 + * These are equally trivial.
    9.18 + */
    9.19 +
    9.20 +static inline unsigned long virt_to_mach(volatile void * address)
    9.21 +{
    9.22 +       return __pa(address) + (unsigned long) start_info.phys_base;
    9.23 +}
    9.24 +
    9.25 +static inline void *mach_to_virt(unsigned long address)
    9.26 +{
    9.27 +        return __va(address) - (unsigned long) start_info.phys_base;
    9.28 +}
    9.29 +
    9.30 +
    9.31 +/*
    9.32   * Change "struct page" to physical address.
    9.33   */
    9.34  #define page_to_phys(page)	((page - mem_map) << PAGE_SHIFT)
    10.1 --- a/xenolinux-2.4.16-sparse/net/core/skbuff.c	Sat Feb 01 12:06:32 2003 +0000
    10.2 +++ b/xenolinux-2.4.16-sparse/net/core/skbuff.c	Wed Feb 05 13:40:29 2003 +0000
    10.3 @@ -59,7 +59,7 @@
    10.4  #include <net/tcp.h>
    10.5  #include <net/udp.h>
    10.6  #include <net/sock.h>
    10.7 -
    10.8 +#include <asm/io.h>
    10.9  #include <asm/uaccess.h>
   10.10  #include <asm/system.h>
   10.11  
   10.12 @@ -246,19 +246,17 @@ void init_net_pages(unsigned long order_
   10.13          {
   10.14                  np = net_page_table + i;
   10.15                  np->virt_addr = (unsigned long)net_page_chunk + (i * PAGE_SIZE);
   10.16 -                
   10.17 +
   10.18                  // now fill the pte pointer:
   10.19                  np->ppte = 0xdeadbeef;
   10.20                  pgd = pgd_offset_k(np->virt_addr);
   10.21 -                if (!pgd_none(*pgd))
   10.22 -                {
   10.23 -                    pmd = pmd_offset(pgd, np->virt_addr);
   10.24 -                    if (!pmd_none(*pmd))
   10.25 -                    {
   10.26 -                            ptep = pte_offset(pmd, np->virt_addr);
   10.27 -                            np->ppte = (unsigned long)ptep; // neet to virt_to_phys this?
   10.28 -                    }
   10.29 -                }
   10.30 +                if (pgd_none(*pgd) || pgd_bad(*pgd)) BUG();
   10.31 +
   10.32 +                if (pmd_none(*pmd)) BUG(); 
   10.33 +                if (pmd_bad(*pmd)) BUG();
   10.34 +
   10.35 +                ptep = pte_offset(pmd, np->virt_addr);
   10.36 +                np->ppte = (unsigned long)virt_to_mach(ptep);
   10.37  
   10.38                  list_add_tail(&np->list, &net_page_list);
   10.39          }
   10.40 @@ -297,10 +295,11 @@ void free_net_page(struct net_page_info 
   10.41      
   10.42      spin_lock_irqsave(&net_page_list_lock, flags);
   10.43      
   10.44 -    list_add_tail(&np->list, &net_page_list);
   10.45 +    list_add(&np->list, &net_page_list);
   10.46      net_pages++;
   10.47  
   10.48      spin_unlock_irqrestore(&net_page_list_lock, flags);
   10.49 +
   10.50  }
   10.51  
   10.52  struct sk_buff *alloc_zc_skb(unsigned int size,int gfp_mask)
   10.53 @@ -427,12 +426,14 @@ static void skb_clone_fraglist(struct sk
   10.54  
   10.55  static void skb_release_data(struct sk_buff *skb)
   10.56  {
   10.57 -	if (!skb->cloned ||
   10.58 +        if (!skb->cloned ||
   10.59  	    atomic_dec_and_test(&(skb_shinfo(skb)->dataref))) {
   10.60  		if (skb_shinfo(skb)->nr_frags) {
   10.61  			int i;
   10.62 -			for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
   10.63 +			for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) 
   10.64 +{
   10.65  				put_page(skb_shinfo(skb)->frags[i].page);
   10.66 +}
   10.67  		}
   10.68  
   10.69  		if (skb_shinfo(skb)->frag_list)
   10.70 @@ -445,6 +446,7 @@ static void skb_release_data(struct sk_b
   10.71                      free_net_page(skb->net_page);
   10.72                  }
   10.73  	}
   10.74 +
   10.75  }
   10.76  
   10.77  /*