ia64/xen-unstable

changeset 97:ce656d157bbf

bitkeeper revision 1.15.1.9 (3e2fd418NdZYqL3KPr6URqd77DwnJQ)

Add shadow ring in the RX direction to protect buffers from guest
tampering once they have been passed to the hypervisor. This
is under the umbrella of eventual zero-copy network code.
author akw27@plucky.localdomain
date Thu Jan 23 11:38:00 2003 +0000 (2003-01-23)
parents f7ff141acc2a
children 540e4f4c8e38
files BitKeeper/etc/logging_ok xen-2.4.16/common/network.c xen-2.4.16/include/hypervisor-ifs/network.h xen-2.4.16/include/xeno/vif.h xen-2.4.16/net/dev.c
line diff
     1.1 --- a/BitKeeper/etc/logging_ok	Tue Jan 21 14:38:45 2003 +0000
     1.2 +++ b/BitKeeper/etc/logging_ok	Thu Jan 23 11:38:00 2003 +0000
     1.3 @@ -1,5 +1,6 @@
     1.4  akw27@boulderdash.cl.cam.ac.uk
     1.5  akw27@labyrinth.cl.cam.ac.uk
     1.6 +akw27@plucky.localdomain
     1.7  iap10@labyrinth.cl.cam.ac.uk
     1.8  kaf24@labyrinth.cl.cam.ac.uk
     1.9  kaf24@plym.cl.cam.ac.uk
     2.1 --- a/xen-2.4.16/common/network.c	Tue Jan 21 14:38:45 2003 +0000
     2.2 +++ b/xen-2.4.16/common/network.c	Thu Jan 23 11:38:00 2003 +0000
     2.3 @@ -49,6 +49,7 @@ net_vif_t *create_net_vif(int domain)
     2.4  {
     2.5      net_vif_t *new_vif;
     2.6      net_ring_t *new_ring;
     2.7 +    net_shadow_ring_t *shadow_ring;
     2.8      struct task_struct *dom_task;
     2.9      
    2.10      if ( !(dom_task = find_domain_by_id(domain)) ) 
    2.11 @@ -64,7 +65,27 @@ net_vif_t *create_net_vif(int domain)
    2.12      new_ring = dom_task->net_ring_base + dom_task->num_net_vifs;
    2.13      memset(new_ring, 0, sizeof(net_ring_t));
    2.14  
    2.15 +    // allocate the shadow ring.  
    2.16 +    // maybe these should be kmem_cache instead of kmalloc?
    2.17 +    
    2.18 +    shadow_ring = kmalloc(sizeof(net_shadow_ring_t), GFP_KERNEL);
    2.19 +    if (shadow_ring == NULL) goto fail;
    2.20 +    
    2.21 +    shadow_ring->tx_ring = kmalloc(TX_RING_SIZE 
    2.22 +                    * sizeof(tx_shadow_entry_t), GFP_KERNEL);
    2.23 +    shadow_ring->rx_ring = kmalloc(RX_RING_SIZE
    2.24 +                    * sizeof(rx_shadow_entry_t), GFP_KERNEL);
    2.25 +    if ((shadow_ring->tx_ring == NULL) || (shadow_ring->rx_ring == NULL))
    2.26 +            goto fail;
    2.27 +
    2.28 +    shadow_ring->rx_prod = 0;
    2.29 +    
    2.30 +    // fill in the new vif struct.
    2.31 +    
    2.32      new_vif->net_ring = new_ring;
    2.33 +    new_vif->shadow_ring = shadow_ring;
    2.34 +    
    2.35 +                    
    2.36      skb_queue_head_init(&new_vif->skb_list);
    2.37      new_vif->domain = domain;
    2.38      
    2.39 @@ -77,6 +98,10 @@ net_vif_t *create_net_vif(int domain)
    2.40      dom_task->num_net_vifs++;
    2.41      
    2.42      return new_vif;
    2.43 +    
    2.44 +fail:
    2.45 +    printk("VIF allocation failed!\n");
    2.46 +    return NULL;
    2.47  }
    2.48  
    2.49  /* delete_net_vif - Delete the last vif in the given domain. 
    2.50 @@ -101,7 +126,10 @@ void destroy_net_vif(struct task_struct 
    2.51      write_lock(&sys_vif_lock);
    2.52      sys_vif_list[p->net_vif_list[i]->id] = NULL; // system vif list not gc'ed
    2.53      write_unlock(&sys_vif_lock);        
    2.54 -    
    2.55 +   
    2.56 +    kfree(p->net_vif_list[i]->shadow_ring->tx_ring);
    2.57 +    kfree(p->net_vif_list[i]->shadow_ring->rx_ring);
    2.58 +    kfree(p->net_vif_list[i]->shadow_ring);
    2.59      kmem_cache_free(net_vif_cache, p->net_vif_list[i]);
    2.60  }
    2.61  
     3.1 --- a/xen-2.4.16/include/hypervisor-ifs/network.h	Tue Jan 21 14:38:45 2003 +0000
     3.2 +++ b/xen-2.4.16/include/hypervisor-ifs/network.h	Thu Jan 23 11:38:00 2003 +0000
     3.3 @@ -15,13 +15,15 @@
     3.4  #include <linux/types.h>
     3.5  
     3.6  typedef struct tx_entry_st {
     3.7 -	unsigned long addr; /* virtual address */
     3.8 -	unsigned long size; /* in bytes */
     3.9 +	unsigned long addr;   /* virtual address */
    3.10 +	unsigned long size;   /* in bytes */
    3.11 +        int           status; /* per descriptor status. */
    3.12  } tx_entry_t;
    3.13  
    3.14  typedef struct rx_entry_st {
    3.15 -	unsigned long addr; /* virtual address */
    3.16 -	unsigned long size; /* in bytes */
    3.17 +	unsigned long addr;   /* virtual address */
    3.18 +	unsigned long size;   /* in bytes */
    3.19 +        int           status; /* per descriptor status. */
    3.20  } rx_entry_t;
    3.21  
    3.22  #define TX_RING_SIZE 1024
    3.23 @@ -111,4 +113,11 @@ typedef struct net_rule_ent_st
    3.24  /* Drop a new rule down to the network tables. */
    3.25  int add_net_rule(net_rule_t *rule);
    3.26  
    3.27 +
    3.28 +/* Descriptor status values:
    3.29 + */
    3.30 +
    3.31 +#define RING_STATUS_OK               0  // Everything is gravy.
    3.32 +#define RING_STATUS_ERR_CFU         -1  // Copy from user problems.
    3.33 +
    3.34  #endif
     4.1 --- a/xen-2.4.16/include/xeno/vif.h	Tue Jan 21 14:38:45 2003 +0000
     4.2 +++ b/xen-2.4.16/include/xeno/vif.h	Thu Jan 23 11:38:00 2003 +0000
     4.3 @@ -18,9 +18,37 @@
     4.4  #include <hypervisor-ifs/network.h>
     4.5  #include <xeno/skbuff.h>
     4.6  
     4.7 +/* 
     4.8 + * shadow ring structures are used to protect the descriptors from
     4.9 + * tampering after they have been passed to the hypervisor.
    4.10 + *
    4.11 + * TX_RING_SIZE and RX_RING_SIZE are defined in the shared network.h.
    4.12 + */
    4.13 +
    4.14 +typedef struct tx_shadow_entry_st {
    4.15 +    unsigned long addr;
    4.16 +    unsigned long size;
    4.17 +    int           status;
    4.18 +    unsigned long flush_count;
    4.19 +} tx_shadow_entry_t;
    4.20 +
    4.21 +typedef struct rx_shadow_entry_st {
    4.22 +    unsigned long addr;
    4.23 +    unsigned long size;
    4.24 +    int           status;
    4.25 +    unsigned long flush_count;
    4.26 +} rx_shadow_entry_t;
    4.27 +
    4.28 +typedef struct net_shadow_ring_st {
    4.29 +    tx_shadow_entry_t *tx_ring;
    4.30 +    rx_shadow_entry_t *rx_ring;
    4.31 +    unsigned int rx_prod;  // trying to add shadow pointers only as I need to.
    4.32 +} net_shadow_ring_t;
    4.33 +
    4.34  typedef struct net_vif_st {
    4.35 -    net_ring_t  *net_ring;
    4.36 -    int          id;
    4.37 +    net_ring_t          *net_ring;
    4.38 +    net_shadow_ring_t   *shadow_ring;
    4.39 +    int                 id;
    4.40      struct sk_buff_head skb_list;
    4.41      unsigned int domain;
    4.42      // rules table goes here in next revision.
    4.43 @@ -40,3 +68,8 @@ void destroy_net_vif(struct task_struct 
    4.44  void add_default_net_rule(int vif_id, u32 ipaddr);
    4.45  int net_get_target_vif(struct sk_buff *skb);
    4.46  void add_default_net_rule(int vif_id, u32 ipaddr);
    4.47 +
    4.48 +/* status fields per-descriptor:
    4.49 + */
    4.50 +
    4.51 +
     5.1 --- a/xen-2.4.16/net/dev.c	Tue Jan 21 14:38:45 2003 +0000
     5.2 +++ b/xen-2.4.16/net/dev.c	Thu Jan 23 11:38:00 2003 +0000
     5.3 @@ -879,9 +879,10 @@ void flush_rx_queue(void)
     5.4      struct sk_buff *skb;
     5.5      shared_info_t *s = current->shared_info;
     5.6      net_ring_t *net_ring;
     5.7 +    net_shadow_ring_t *shadow_ring;
     5.8      unsigned int i, nvif;
     5.9 -    rx_entry_t rx;
    5.10 -
    5.11 +    rx_shadow_entry_t *rx;
    5.12 +    
    5.13      /* I have changed this to batch flush all vifs for a guest
    5.14       * at once, whenever this is called.  Since the guest is about to be
    5.15       * scheduled and issued an RX interrupt for one nic, it might as well
    5.16 @@ -899,6 +900,7 @@ void flush_rx_queue(void)
    5.17      for (nvif = 0; nvif < current->num_net_vifs; nvif++)
    5.18      {
    5.19          net_ring = current->net_vif_list[nvif]->net_ring;
    5.20 +        shadow_ring = current->net_vif_list[nvif]->shadow_ring;
    5.21          while ( (skb = skb_dequeue(&current->net_vif_list[nvif]->skb_list)) 
    5.22                          != NULL )
    5.23          {
    5.24 @@ -923,12 +925,13 @@ void flush_rx_queue(void)
    5.25              i = net_ring->rx_cons;
    5.26              if ( i != net_ring->rx_prod )
    5.27              {
    5.28 -                if ( !copy_from_user(&rx, net_ring->rx_ring+i, sizeof(rx)) )
    5.29 +                if ( shadow_ring->rx_ring[i].status == RING_STATUS_OK)
    5.30                  {
    5.31 -                    if ( (skb->len + ETH_HLEN) < rx.size )
    5.32 -                        rx.size = skb->len + ETH_HLEN;
    5.33 -                    copy_to_user((void *)rx.addr, skb->mac.raw, rx.size);
    5.34 -                    copy_to_user(net_ring->rx_ring+i, &rx, sizeof(rx));
    5.35 +                    rx = shadow_ring->rx_ring+i;
    5.36 +                    if ( (skb->len + ETH_HLEN) < rx->size )
    5.37 +                        rx->size = skb->len + ETH_HLEN;
    5.38 +                    copy_to_user((void *)rx->addr, skb->mac.raw, rx->size);
    5.39 +                    copy_to_user(net_ring->rx_ring+i, rx, sizeof(rx));
    5.40                  }
    5.41                  net_ring->rx_cons = (i+1) & (RX_RING_SIZE-1);
    5.42                  if ( net_ring->rx_cons == net_ring->rx_event )
    5.43 @@ -1923,7 +1926,8 @@ int __init net_dev_init(void)
    5.44  long do_net_update(void)
    5.45  {
    5.46      shared_info_t *shared = current->shared_info;    
    5.47 -    net_ring_t *net_ring = current->net_ring_base;
    5.48 +    net_ring_t *net_ring;
    5.49 +    net_shadow_ring_t *shadow_ring;
    5.50      net_vif_t *current_vif;
    5.51      unsigned int i, j;
    5.52      struct sk_buff *skb;
    5.53 @@ -1933,6 +1937,10 @@ long do_net_update(void)
    5.54      {
    5.55          current_vif = current->net_vif_list[j];
    5.56          net_ring = current_vif->net_ring;
    5.57 +
    5.58 +        /* First, we send out pending TX descriptors if they exist on this ring.
    5.59 +         */
    5.60 +        
    5.61          for ( i = net_ring->tx_cons; i != net_ring->tx_prod; i = TX_RING_INC(i) )
    5.62          {
    5.63              if ( copy_from_user(&tx, net_ring->tx_ring+i, sizeof(tx)) )
    5.64 @@ -1997,6 +2005,32 @@ long do_net_update(void)
    5.65              }
    5.66          }
    5.67          net_ring->tx_cons = i;
    5.68 +
    5.69 +        /* Next, pull any new RX descriptors across to the shadow ring.
    5.70 +         * Note that in the next revision, these will reference PTEs and the
    5.71 +         * code here will have to validate reference and flush counts, copy the 
    5.72 +         * descriptor, change the ownership to dom0 and invalidate the client's
    5.73 +         * version of the page.
    5.74 +         */
    5.75 +    
    5.76 +        shadow_ring = current_vif->shadow_ring;
    5.77 +
    5.78 +        for (i = shadow_ring->rx_prod; i != net_ring->rx_prod; i = TX_RING_INC(i))
    5.79 +        {
    5.80 +            /* This copy assumes that rx_shadow_entry_t is an extension of rx_net_entry_t
    5.81 +             * extra fields must be tacked on to the end.
    5.82 +             */
    5.83 +            
    5.84 +            if ( copy_from_user( shadow_ring->rx_ring+i, net_ring->rx_ring+i, 
    5.85 +                                 sizeof (rx_entry_t) ) )
    5.86 +            {
    5.87 +                shadow_ring->rx_ring[i].status = RING_STATUS_ERR_CFU;
    5.88 +            } else {
    5.89 +                shadow_ring->rx_ring[i].status = RING_STATUS_OK;
    5.90 +            }
    5.91 +        }
    5.92 +
    5.93 +        shadow_ring->rx_prod = net_ring->rx_prod;
    5.94      }
    5.95  
    5.96      return 0;