direct-io.hg

changeset 368:ff1bb22c81a1

bitkeeper revision 1.172.1.1 (3e9d96adF7Xs5Gzerfc8T2csCpdBhQ)

Many files:
We can now release resources (eg. memory) when a domain is destroyed. This is because we refcount network packets and suchlike.
author kaf24@scramble.cl.cam.ac.uk
date Wed Apr 16 17:45:17 2003 +0000 (2003-04-16)
parents 87768b015863
children 95c43a4b6685
files xen/common/dom0_ops.c xen/common/domain.c xen/common/network.c xen/common/schedule.c xen/drivers/block/xen_block.c xen/include/asm-i386/processor.h xen/include/xeno/blkdev.h xen/include/xeno/list.h xen/include/xeno/sched.h xen/include/xeno/vif.h xen/net/dev.c xen/net/skbuff.c
line diff
     1.1 --- a/xen/common/dom0_ops.c	Wed Apr 16 12:44:16 2003 +0000
     1.2 +++ b/xen/common/dom0_ops.c	Wed Apr 16 17:45:17 2003 +0000
     1.3 @@ -18,21 +18,8 @@ extern unsigned int alloc_new_dom_mem(st
     1.4  
     1.5  static unsigned int get_domnr(void)
     1.6  {
     1.7 -#if 1
     1.8      static unsigned int domnr = 0;
     1.9      return ++domnr;
    1.10 -#else
    1.11 -    struct task_struct *p = &idle0_task;
    1.12 -    unsigned long dom_mask = 0;
    1.13 -    read_lock_irq(&tasklist_lock);
    1.14 -    do {
    1.15 -        if ( is_idle_task(p) ) continue;
    1.16 -        set_bit(p->domain, &dom_mask); 
    1.17 -    }
    1.18 -    while ( (p = p->next_task) != &idle0_task );   
    1.19 -    read_unlock_irq(&tasklist_lock);
    1.20 -    return (dom_mask == ~0UL) ? 0 : ffz(dom_mask);
    1.21 -#endif
    1.22  }
    1.23  
    1.24  static void build_page_list(struct task_struct *p)
     2.1 --- a/xen/common/domain.c	Wed Apr 16 12:44:16 2003 +0000
     2.2 +++ b/xen/common/domain.c	Wed Apr 16 17:45:17 2003 +0000
     2.3 @@ -66,6 +66,8 @@ struct task_struct *do_newdomain(unsigne
     2.4  
     2.5      sched_add_domain(p);
     2.6  
     2.7 +    INIT_LIST_HEAD(&p->net_vifs);
     2.8 +
     2.9      p->net_ring_base = (net_ring_t *)(p->shared_info + 1);
    2.10      INIT_LIST_HEAD(&p->pg_head);
    2.11      p->max_pages = p->tot_pages = 0;
    2.12 @@ -111,6 +113,9 @@ void kill_domain_with_errmsg(const char 
    2.13  /* Kill the currently executing domain. */
    2.14  void kill_domain(void)
    2.15  {
    2.16 +    struct list_head *ent;
    2.17 +    net_vif_t *vif;
    2.18 +
    2.19      if ( current->domain == 0 )
    2.20      {
    2.21          extern void machine_restart(char *);
    2.22 @@ -119,8 +124,17 @@ void kill_domain(void)
    2.23      }
    2.24  
    2.25      printk("Killing domain %d\n", current->domain);
    2.26 +
    2.27 +    sched_rem_domain(current);
    2.28 +
    2.29 +    unlink_blkdev_info(current);
    2.30 +
    2.31 +    while ( (ent = current->net_vifs.next) != &current->net_vifs )
    2.32 +    {
    2.33 +        vif = list_entry(ent, net_vif_t, dom_list);
    2.34 +        unlink_net_vif(vif);
    2.35 +    }    
    2.36      
    2.37 -    sched_rem_domain(current);
    2.38      schedule();
    2.39      BUG(); /* never get here */
    2.40  }
    2.41 @@ -182,13 +196,14 @@ unsigned int alloc_new_dom_mem(struct ta
    2.42          list_del(&pf->list);
    2.43          list_add_tail(&pf->list, &p->pg_head);
    2.44          free_pfns--;
    2.45 +        ASSERT(free_pfns != 0);
    2.46      }
    2.47     
    2.48      spin_unlock_irqrestore(&free_list_lock, flags);
    2.49      
    2.50      p->tot_pages = req_pages;
    2.51  
    2.52 -    // temporary, max_pages should be explicitly specified
    2.53 +    /* TEMPORARY: max_pages should be explicitly specified. */
    2.54      p->max_pages = p->tot_pages;
    2.55  
    2.56      return 0;
    2.57 @@ -197,15 +212,21 @@ unsigned int alloc_new_dom_mem(struct ta
    2.58  
    2.59  void free_all_dom_mem(struct task_struct *p)
    2.60  {
    2.61 -    struct list_head *list_ent, *tmp;
    2.62 +    struct list_head *ent;
    2.63 +    unsigned long flags;
    2.64  
    2.65 -    list_for_each_safe(list_ent, tmp, &p->pg_head)
    2.66 +    spin_lock_irqsave(&free_list_lock, flags);
    2.67 +    while ( (ent = p->pg_head.next) != &p->pg_head )
    2.68      {
    2.69 -        struct pfn_info *pf = list_entry(list_ent, struct pfn_info, list);
    2.70 +        struct pfn_info *pf = list_entry(ent, struct pfn_info, list);
    2.71          pf->type_count = pf->tot_count = pf->flags = 0;
    2.72 -        list_del(list_ent);
    2.73 -        list_add(list_ent, &free_list);
    2.74 +        ASSERT(ent->next->prev == ent);
    2.75 +        ASSERT(ent->prev->next == ent);
    2.76 +        list_del(ent);
    2.77 +        list_add(ent, &free_list);
    2.78 +        free_pfns++;
    2.79      }
    2.80 +    spin_unlock_irqrestore(&free_list_lock, flags);
    2.81  
    2.82      p->tot_pages = 0;
    2.83  }
    2.84 @@ -216,36 +237,25 @@ void release_task(struct task_struct *p)
    2.85  {
    2.86      ASSERT(p->state == TASK_DYING);
    2.87      ASSERT(!p->has_cpu);
    2.88 +
    2.89 +    printk("Releasing task %d\n", p->domain);
    2.90 +
    2.91      write_lock_irq(&tasklist_lock);
    2.92      REMOVE_LINKS(p);
    2.93      write_unlock_irq(&tasklist_lock);
    2.94  
    2.95 -    /* XXX SMH: so below is screwed currently; need ref counting on vifs,
    2.96 -       vhds, etc and proper clean up. Until then just blow the memory :-( */
    2.97 -#if 0
    2.98      /*
    2.99 -     * Safe! Only queue skbuffs with tasklist_lock held.
   2.100 -     * Only access shared_info with tasklist_lock held.
   2.101 -     * And free_task_struct() only releases if refcnt == 0.
   2.102 +     * This frees up blkdev rings. Totally safe since blkdev ref counting
   2.103 +     * actually uses the task_struct refcnt.
   2.104       */
   2.105 -    while ( p->num_net_vifs )
   2.106 -    {
   2.107 -        destroy_net_vif(p);
   2.108 -    }
   2.109 -    
   2.110 -    free_page((unsigned long)p->mm.perdomain_pt);
   2.111 -
   2.112      destroy_blkdev_info(p);
   2.113  
   2.114 +    /* Free all memory associated with this domain. */
   2.115 +    free_page((unsigned long)p->mm.perdomain_pt);
   2.116      UNSHARE_PFN(virt_to_page(p->shared_info));
   2.117      free_page((unsigned long)p->shared_info);
   2.118 -
   2.119      free_all_dom_mem(p);
   2.120 -
   2.121 -    free_task_struct(p);
   2.122 -#else 
   2.123 -    printk("XEN::release_task: not freeing memory etc yet XXX FIXME.\n"); 
   2.124 -#endif
   2.125 +    free_pages((unsigned long)p, 1);
   2.126  }
   2.127  
   2.128  
     3.1 --- a/xen/common/network.c	Wed Apr 16 12:44:16 2003 +0000
     3.2 +++ b/xen/common/network.c	Wed Apr 16 17:45:17 2003 +0000
     3.3 @@ -32,7 +32,8 @@ net_rule_ent_t *net_rule_list;          
     3.4  kmem_cache_t *net_vif_cache;                        
     3.5  kmem_cache_t *net_rule_cache;
     3.6  static rwlock_t net_rule_lock = RW_LOCK_UNLOCKED;   /* rule mutex */
     3.7 -static rwlock_t sys_vif_lock = RW_LOCK_UNLOCKED;    /* vif mutex */
     3.8 +
     3.9 +rwlock_t sys_vif_lock = RW_LOCK_UNLOCKED;    /* vif mutex */
    3.10  
    3.11  void print_net_rule_list();
    3.12  
    3.13 @@ -53,7 +54,8 @@ net_vif_t *create_net_vif(int domain)
    3.14      net_ring_t *new_ring;
    3.15      net_shadow_ring_t *shadow_ring;
    3.16      struct task_struct *dom_task;
    3.17 -    
    3.18 +    unsigned long flags;
    3.19 +
    3.20      if ( !(dom_task = find_domain_by_id(domain)) )
    3.21          return NULL;
    3.22      
    3.23 @@ -76,24 +78,24 @@ net_vif_t *create_net_vif(int domain)
    3.24      shadow_ring->rx_prod = shadow_ring->rx_cons = shadow_ring->rx_idx = 0;
    3.25      shadow_ring->tx_prod = shadow_ring->tx_cons = shadow_ring->tx_idx = 0;
    3.26      
    3.27 -    /* Fill in the new vif struct. */
    3.28 -    
    3.29 -    new_vif->net_ring = new_ring;
    3.30 +    /*
    3.31 +     * Fill in the new vif struct. Note that, while the vif's refcnt is
    3.32 +     * non-zero, we hold a reference to the task structure.
    3.33 +     */
    3.34 +    atomic_set(&new_vif->refcnt, 1);
    3.35 +    new_vif->net_ring    = new_ring;
    3.36      new_vif->shadow_ring = shadow_ring;
    3.37 -    
    3.38 -    new_vif->domain = dom_task;
    3.39 +    new_vif->domain      = dom_task;
    3.40 +    new_vif->list.next   = NULL;
    3.41  
    3.42 -    new_vif->list.next = NULL;
    3.43 -    
    3.44 -    write_lock(&sys_vif_lock);
    3.45 +    list_add(&new_vif->dom_list, &dom_task->net_vifs);
    3.46 +    dom_task->num_net_vifs++;
    3.47 +
    3.48 +    write_lock_irqsave(&sys_vif_lock, flags);
    3.49      new_vif->id = sys_vif_count;
    3.50      sys_vif_list[sys_vif_count++] = new_vif;
    3.51 -    write_unlock(&sys_vif_lock);
    3.52 -
    3.53 -    dom_task->net_vif_list[dom_task->num_net_vifs] = new_vif;
    3.54 -    dom_task->num_net_vifs++;
    3.55 +    write_unlock_irqrestore(&sys_vif_lock, flags);
    3.56      
    3.57 -    free_task_struct(dom_task);
    3.58      return new_vif;
    3.59      
    3.60  fail:
    3.61 @@ -109,71 +111,81 @@ fail:
    3.62      return NULL;
    3.63  }
    3.64  
    3.65 -/* delete_net_vif - Delete the last vif in the given domain. 
    3.66 - *
    3.67 - * There doesn't seem to be any reason (yet) to be able to axe an arbitrary 
    3.68 - * vif, by vif id. 
    3.69 - */
    3.70 -
    3.71 -void destroy_net_vif(struct task_struct *p)
    3.72 +void destroy_net_vif(net_vif_t *vif)
    3.73  {
    3.74      int i;
    3.75 +    unsigned long *pte, flags;
    3.76 +    struct pfn_info *page;
    3.77 +    struct task_struct *p = vif->domain;
    3.78  
    3.79 -    if ( p->num_net_vifs <= 0 ) return; // nothing to do.
    3.80 -    
    3.81 -    i = --p->num_net_vifs;
    3.82 -    
    3.83 -    write_lock(&sys_vif_lock);
    3.84 -    sys_vif_list[p->net_vif_list[i]->id] = NULL; // system vif list not gc'ed
    3.85 -    write_unlock(&sys_vif_lock);        
    3.86 -   
    3.87 -    kfree(p->net_vif_list[i]->shadow_ring->tx_ring);
    3.88 -    kfree(p->net_vif_list[i]->shadow_ring->rx_ring);
    3.89 -    kfree(p->net_vif_list[i]->shadow_ring);
    3.90 -    kmem_cache_free(net_vif_cache, p->net_vif_list[i]);
    3.91 +    /* Return any outstanding receive buffers to the guest OS. */
    3.92 +    spin_lock_irqsave(&p->page_lock, flags);
    3.93 +    for ( i  = vif->shadow_ring->rx_idx; 
    3.94 +          i != vif->shadow_ring->rx_prod; 
    3.95 +          i  = ((i+1) & (RX_RING_SIZE-1)) )
    3.96 +    {
    3.97 +        rx_shadow_entry_t *rx = vif->shadow_ring->rx_ring + i;
    3.98 +        if ( rx->status != RING_STATUS_OK ) continue;
    3.99 +        pte  = map_domain_mem(rx->addr);
   3.100 +        *pte |= _PAGE_PRESENT;
   3.101 +        page = frame_table + (*pte >> PAGE_SHIFT);
   3.102 +        page->flags &= ~PG_type_mask;
   3.103 +        if ( (*pte & _PAGE_RW) ) 
   3.104 +            page->flags |= PGT_writeable_page | PG_need_flush;
   3.105 +        unmap_domain_mem(pte);
   3.106 +    }
   3.107 +    spin_unlock_irqrestore(&p->page_lock, flags);
   3.108 +
   3.109 +    kfree(vif->shadow_ring->tx_ring);
   3.110 +    kfree(vif->shadow_ring->rx_ring);
   3.111 +    kfree(vif->shadow_ring);
   3.112 +    kmem_cache_free(net_vif_cache, vif);
   3.113 +    free_task_struct(p);
   3.114  }
   3.115  
   3.116 +void unlink_net_vif(net_vif_t *vif)
   3.117 +{
   3.118 +    unsigned long flags;
   3.119 +    list_del(&vif->dom_list);
   3.120 +    vif->domain->num_net_vifs--;
   3.121 +    write_lock_irqsave(&sys_vif_lock, flags);
   3.122 +    sys_vif_list[vif->id] = NULL;
   3.123 +    write_unlock_irqrestore(&sys_vif_lock, flags);
   3.124 +    put_vif(vif);
   3.125 +}
   3.126 +
   3.127 +
   3.128  /* vif_query - Call from the proc file system to get a list of vifs 
   3.129   * assigned to a particular domain.
   3.130   */
   3.131  
   3.132  void vif_query(vif_query_t *vq)
   3.133  {
   3.134 -    struct task_struct *dom_task;
   3.135 +    net_vif_t *vif;
   3.136 +    struct task_struct *p;
   3.137 +    unsigned long flags;
   3.138      char buf[128];
   3.139      int i;
   3.140  
   3.141 -    if ( !(dom_task = find_domain_by_id(vq->domain)) ) return;
   3.142 +    if ( !(p = find_domain_by_id(vq->domain)) ) 
   3.143 +        return;
   3.144  
   3.145      *buf = '\0';
   3.146  
   3.147 -    for ( i = 0; i < dom_task->num_net_vifs; i++ )
   3.148 -        sprintf(buf + strlen(buf), "%d\n", dom_task->net_vif_list[i]->id);
   3.149 +    read_lock_irqsave(&sys_vif_lock, flags);
   3.150 +    for ( i = 0; i < MAX_SYSTEM_VIFS; i++ )
   3.151 +    {
   3.152 +        vif = sys_vif_list[i];
   3.153 +        if ( (vif == NULL) || (vif->domain != p) ) continue;
   3.154 +        sprintf(buf + strlen(buf), "%d\n", vif->id);
   3.155 +    }
   3.156 +    read_unlock_irqrestore(&sys_vif_lock, flags);
   3.157  
   3.158      copy_to_user(vq->buf, buf, strlen(buf) + 1);
   3.159      
   3.160 -    free_task_struct(dom_task);
   3.161 +    free_task_struct(p);
   3.162  }
   3.163          
   3.164 -
   3.165 -/* print_vif_list - Print the contents of the global vif table.
   3.166 - */
   3.167 -
   3.168 -void print_vif_list()
   3.169 -{
   3.170 -    int i;
   3.171 -    net_vif_t *v;
   3.172 -
   3.173 -    printk("Currently, there are %d VIFs.\n", sys_vif_count);
   3.174 -    for ( i = 0; i<sys_vif_count; i++ )
   3.175 -    {
   3.176 -        v = sys_vif_list[i];
   3.177 -        printk("] VIF Entry %d(%d):\n", i, v->id);
   3.178 -        printk("   > net_ring*:  %p\n", v->net_ring);
   3.179 -        printk("   > domain   :  %u\n", v->domain->domain);
   3.180 -    }
   3.181 -}
   3.182 -
   3.183  /* ----[ Net Rule Functions ]-----------------------------------------------*/
   3.184  
   3.185  /* add_net_rule - Add a new network filter rule.
     4.1 --- a/xen/common/schedule.c	Wed Apr 16 12:44:16 2003 +0000
     4.2 +++ b/xen/common/schedule.c	Wed Apr 16 17:45:17 2003 +0000
     4.3 @@ -440,7 +440,8 @@ asmlinkage void schedule(void)
     4.4      prev = schedule_data[this_cpu].prev;
     4.5      
     4.6      prev->policy &= ~SCHED_YIELD;
     4.7 -    if ( prev->state == TASK_DYING ) release_task(prev);
     4.8 +    if ( prev->state == TASK_DYING ) 
     4.9 +        free_task_struct(prev);
    4.10  
    4.11   same_process:
    4.12      /* update the domains notion of time  */
     5.1 --- a/xen/drivers/block/xen_block.c	Wed Apr 16 12:44:16 2003 +0000
     5.2 +++ b/xen/drivers/block/xen_block.c	Wed Apr 16 17:45:17 2003 +0000
     5.3 @@ -128,6 +128,7 @@ static void remove_from_blkdev_list(stru
     5.4      {
     5.5          list_del(&p->blkdev_list);
     5.6          p->blkdev_list.next = NULL;
     5.7 +        free_task_struct(p);
     5.8      }
     5.9      spin_unlock_irqrestore(&io_schedule_list_lock, flags);
    5.10  }
    5.11 @@ -140,6 +141,7 @@ static void add_to_blkdev_list_tail(stru
    5.12      if ( !__on_blkdev_list(p) )
    5.13      {
    5.14          list_add_tail(&p->blkdev_list, &io_schedule_list);
    5.15 +        get_task_struct(p);
    5.16      }
    5.17      spin_unlock_irqrestore(&io_schedule_list_lock, flags);
    5.18  }
    5.19 @@ -162,9 +164,11 @@ static void io_schedule(unsigned long un
    5.20      {
    5.21          ent = io_schedule_list.next;
    5.22          p = list_entry(ent, struct task_struct, blkdev_list);
    5.23 +        get_task_struct(p);
    5.24          remove_from_blkdev_list(p);
    5.25          if ( do_block_io_op_domain(p, BATCH_PER_DOMAIN) )
    5.26              add_to_blkdev_list_tail(p);
    5.27 +        free_task_struct(p);
    5.28      }
    5.29  
    5.30      /* Push the batch through to disc. */
    5.31 @@ -214,6 +218,7 @@ static void end_block_io_op(struct buffe
    5.32      {
    5.33          make_response(pending_req->domain, pending_req->id,
    5.34                        pending_req->operation, pending_req->status);
    5.35 +        free_task_struct(pending_req->domain);
    5.36          spin_lock_irqsave(&pend_prod_lock, flags);
    5.37          pending_ring[pending_prod] = pending_req - pending_reqs;
    5.38          PENDREQ_IDX_INC(pending_prod);
    5.39 @@ -598,6 +603,8 @@ static void dispatch_rw_block_io(struct 
    5.40      pending_req->status    = 0;
    5.41      atomic_set(&pending_req->pendcnt, nr_psegs);
    5.42  
    5.43 +    get_task_struct(p);
    5.44 +
    5.45      /* Now we pass each segment down to the real blkdev layer. */
    5.46      for ( i = 0; i < nr_psegs; i++ )
    5.47      {
    5.48 @@ -732,14 +739,28 @@ void init_blkdev_info(struct task_struct
    5.49      xen_refresh_segment_list(p);
    5.50  }
    5.51  
    5.52 -/* End-of-day teardown for a domain. XXX Outstanding requests? */
    5.53 +/* End-of-day teardown for a domain. */
    5.54  void destroy_blkdev_info(struct task_struct *p)
    5.55  {
    5.56 -    remove_from_blkdev_list(p);
    5.57 +    ASSERT(!__on_blkdev_list(p));
    5.58      UNSHARE_PFN(virt_to_page(p->blk_ring_base));
    5.59      free_page((unsigned long)p->blk_ring_base);
    5.60  }
    5.61  
    5.62 +void unlink_blkdev_info(struct task_struct *p)
    5.63 +{
    5.64 +    unsigned long flags;
    5.65 +
    5.66 +    spin_lock_irqsave(&io_schedule_list_lock, flags);
    5.67 +    if ( __on_blkdev_list(p) )
    5.68 +    {
    5.69 +        list_del(&p->blkdev_list);
    5.70 +        p->blkdev_list.next = (void *)0xdeadbeef; /* prevent reinsertion */
    5.71 +        free_task_struct(p);
    5.72 +    }
    5.73 +    spin_unlock_irqrestore(&io_schedule_list_lock, flags);
    5.74 +}
    5.75 +
    5.76  void initialize_block_io ()
    5.77  {
    5.78      int i;
     6.1 --- a/xen/include/asm-i386/processor.h	Wed Apr 16 12:44:16 2003 +0000
     6.2 +++ b/xen/include/asm-i386/processor.h	Wed Apr 16 17:45:17 2003 +0000
     6.3 @@ -432,7 +432,7 @@ unsigned long get_wchan(struct task_stru
     6.4  #define alloc_task_struct()  \
     6.5    ((struct task_struct *) __get_free_pages(GFP_KERNEL,1))
     6.6  #define free_task_struct(_p) \
     6.7 -  if ( atomic_dec_and_test(&(_p)->refcnt) ) free_pages((unsigned long)(_p), 1)
     6.8 +  if ( atomic_dec_and_test(&(_p)->refcnt) ) release_task(_p)
     6.9  #define get_task_struct(_p)  \
    6.10    atomic_inc(&(_p)->refcnt)
    6.11  
     7.1 --- a/xen/include/xeno/blkdev.h	Wed Apr 16 12:44:16 2003 +0000
     7.2 +++ b/xen/include/xeno/blkdev.h	Wed Apr 16 17:45:17 2003 +0000
     7.3 @@ -26,6 +26,7 @@ typedef struct {
     7.4  extern kdev_t xendev_to_physdev(unsigned short xendev);
     7.5  
     7.6  extern void init_blkdev_info(struct task_struct *);
     7.7 +extern void unlink_blkdev_info(struct task_struct *);
     7.8  extern void destroy_blkdev_info(struct task_struct *);
     7.9  
    7.10  extern int unregister_blkdev(unsigned int, const char *);
     8.1 --- a/xen/include/xeno/list.h	Wed Apr 16 12:44:16 2003 +0000
     8.2 +++ b/xen/include/xeno/list.h	Wed Apr 16 17:45:17 2003 +0000
     8.3 @@ -1,6 +1,8 @@
     8.4  #ifndef _LINUX_LIST_H
     8.5  #define _LINUX_LIST_H
     8.6  
     8.7 +#include <xeno/lib.h>
     8.8 +
     8.9  /*
    8.10   * Simple doubly linked list implementation.
    8.11   *
    8.12 @@ -87,6 +89,8 @@ static __inline__ void __list_del(struct
    8.13   */
    8.14  static __inline__ void list_del(struct list_head *entry)
    8.15  {
    8.16 +	ASSERT(entry->next->prev == entry);
    8.17 +	ASSERT(entry->prev->next == entry);
    8.18  	__list_del(entry->prev, entry->next);
    8.19  }
    8.20  
     9.1 --- a/xen/include/xeno/sched.h	Wed Apr 16 12:44:16 2003 +0000
     9.2 +++ b/xen/include/xeno/sched.h	Wed Apr 16 17:45:17 2003 +0000
     9.3 @@ -117,10 +117,9 @@ struct task_struct {
     9.4      long warped;                    /* time it ran warped last time */
     9.5      long uwarped;                   /* time it ran unwarped last time */
     9.6  
     9.7 -
     9.8      /* Network I/O */
     9.9      net_ring_t *net_ring_base;
    9.10 -    net_vif_t *net_vif_list[MAX_GUEST_VIFS];
    9.11 +    struct list_head net_vifs;
    9.12      int num_net_vifs;
    9.13  
    9.14      /* Block I/O */
    10.1 --- a/xen/include/xeno/vif.h	Wed Apr 16 12:44:16 2003 +0000
    10.2 +++ b/xen/include/xeno/vif.h	Wed Apr 16 17:45:17 2003 +0000
    10.3 @@ -71,9 +71,17 @@ typedef struct net_vif_st {
    10.4      net_shadow_ring_t  *shadow_ring;
    10.5      int                 id;
    10.6      struct task_struct *domain;
    10.7 -    struct list_head    list;
    10.8 +    struct list_head    list;     /* scheduling list */
    10.9 +    struct list_head    dom_list; /* domain list     */
   10.10 +    atomic_t            refcnt;
   10.11  } net_vif_t;
   10.12  
   10.13 +#define get_vif(_v) (atomic_inc(&(_v)->refcnt))
   10.14 +#define put_vif(_v)                                                \
   10.15 +do {                                                               \
   10.16 +    if ( atomic_dec_and_test(&(_v)->refcnt) ) destroy_net_vif(_v); \
   10.17 +} while (0)                                                        \
   10.18 +
   10.19  /* VIF-related defines. */
   10.20  #define MAX_GUEST_VIFS    2 // each VIF is a small overhead in task_struct
   10.21  #define MAX_SYSTEM_VIFS 256  
   10.22 @@ -81,10 +89,12 @@ typedef struct net_vif_st {
   10.23  /* vif globals */
   10.24  extern int sys_vif_count;
   10.25  extern net_vif_t *sys_vif_list[];
   10.26 +extern rwlock_t sys_vif_lock; /* protects the sys_vif_list */
   10.27  
   10.28  /* vif prototypes */
   10.29  net_vif_t *create_net_vif(int domain);
   10.30 -void destroy_net_vif(struct task_struct *p);
   10.31 +void destroy_net_vif(net_vif_t *vif);
   10.32 +void unlink_net_vif(net_vif_t *vif);
   10.33  void add_default_net_rule(int vif_id, u32 ipaddr);
   10.34  int __net_get_target_vif(u8 *data, unsigned int len, int src_vif);
   10.35  void add_default_net_rule(int vif_id, u32 ipaddr);
    11.1 --- a/xen/net/dev.c	Wed Apr 16 12:44:16 2003 +0000
    11.2 +++ b/xen/net/dev.c	Wed Apr 16 17:45:17 2003 +0000
    11.3 @@ -499,13 +499,14 @@ void deliver_packet(struct sk_buff *skb,
    11.4      if ( (i = shadow_ring->rx_idx) == shadow_ring->rx_prod )
    11.5          return;
    11.6  
    11.7 -    if ( shadow_ring->rx_ring[i].status != RING_STATUS_OK )
    11.8 +    rx = shadow_ring->rx_ring + i;
    11.9 +
   11.10 +    if ( rx->status != RING_STATUS_OK )
   11.11      {
   11.12          DPRINTK("Bad buffer in deliver_packet()\n");
   11.13          goto inc_and_out;
   11.14      }
   11.15  
   11.16 -    rx = shadow_ring->rx_ring + i;
   11.17      ASSERT(skb->len <= PAGE_SIZE);
   11.18      rx->size   = skb->len;
   11.19      rx->offset = (unsigned char)((unsigned long)skb->data & ~PAGE_MASK);
   11.20 @@ -517,12 +518,16 @@ void deliver_packet(struct sk_buff *skb,
   11.21      g_pfn = frame_table + (*g_pte >> PAGE_SHIFT);
   11.22      h_pfn = skb->pf;
   11.23          
   11.24 -    h_pfn->tot_count = h_pfn->type_count = 1;
   11.25 -    g_pfn->tot_count = g_pfn->type_count = 0;
   11.26 +    h_pfn->tot_count = 1;
   11.27 +    g_pfn->tot_count = g_pfn->type_count = h_pfn->type_count = 0;
   11.28      h_pfn->flags = g_pfn->flags & ~PG_type_mask;
   11.29 +    g_pfn->flags = 0;
   11.30          
   11.31 -    if (*g_pte & _PAGE_RW) h_pfn->flags |= PGT_writeable_page | PG_need_flush;
   11.32 -    g_pfn->flags = 0;
   11.33 +    if ( (*g_pte & _PAGE_RW) )
   11.34 +    {
   11.35 +        h_pfn->flags |= PGT_writeable_page | PG_need_flush;
   11.36 +        h_pfn->type_count = 1;
   11.37 +    }
   11.38          
   11.39      /* Point the guest at the new machine frame. */
   11.40      machine_to_phys_mapping[h_pfn - frame_table] 
   11.41 @@ -533,6 +538,9 @@ void deliver_packet(struct sk_buff *skb,
   11.42      
   11.43      unmap_domain_mem(g_pte);
   11.44  
   11.45 +    list_del(&g_pfn->list);
   11.46 +    list_add(&h_pfn->list, &vif->domain->pg_head);
   11.47 +
   11.48      spin_unlock_irqrestore(&vif->domain->page_lock, flags);
   11.49      
   11.50      /* Our skbuff now points at the guest's old frame. */
   11.51 @@ -587,9 +595,11 @@ int netif_rx(struct sk_buff *skb)
   11.52      if ( skb->dst_vif == VIF_UNKNOWN_INTERFACE )
   11.53          skb->dst_vif = __net_get_target_vif(skb->data, skb->len, skb->src_vif);
   11.54          
   11.55 -    if ( ((vif = sys_vif_list[skb->dst_vif]) == NULL) ||
   11.56 -         (skb->dst_vif <= VIF_PHYSICAL_INTERFACE) )
   11.57 +    read_lock_irqsave(&sys_vif_lock, flags);
   11.58 +    if ( (skb->dst_vif <= VIF_PHYSICAL_INTERFACE) ||
   11.59 +         ((vif = sys_vif_list[skb->dst_vif]) == NULL) )
   11.60      {
   11.61 +        read_unlock_irqrestore(&sys_vif_lock, flags);
   11.62          netdev_rx_stat[this_cpu].dropped++;
   11.63          unmap_domain_mem(skb->head);
   11.64          kfree_skb(skb);
   11.65 @@ -597,8 +607,13 @@ int netif_rx(struct sk_buff *skb)
   11.66          return NET_RX_DROP;
   11.67      }
   11.68  
   11.69 +    get_vif(vif);
   11.70 +    read_unlock_irqrestore(&sys_vif_lock, flags);
   11.71 +
   11.72      deliver_packet(skb, vif);
   11.73      cpu_mask = mark_hyp_event(vif->domain, _HYP_EVENT_NET_RX);
   11.74 +    put_vif(vif);
   11.75 +
   11.76      unmap_domain_mem(skb->head);
   11.77      kfree_skb(skb);
   11.78      hyp_event_notify(cpu_mask);
   11.79 @@ -636,13 +651,11 @@ static int __on_net_schedule_list(net_vi
   11.80  static void remove_from_net_schedule_list(net_vif_t *vif)
   11.81  {
   11.82      unsigned long flags;
   11.83 -    if ( !__on_net_schedule_list(vif) ) return;
   11.84      spin_lock_irqsave(&net_schedule_list_lock, flags);
   11.85 -    if ( __on_net_schedule_list(vif) )
   11.86 -    {
   11.87 -        list_del(&vif->list);
   11.88 -        vif->list.next = NULL;
   11.89 -    }
   11.90 +    ASSERT(__on_net_schedule_list(vif));
   11.91 +    list_del(&vif->list);
   11.92 +    vif->list.next = NULL;
   11.93 +    put_vif(vif);
   11.94      spin_unlock_irqrestore(&net_schedule_list_lock, flags);
   11.95  }
   11.96  
   11.97 @@ -654,6 +667,7 @@ static void add_to_net_schedule_list_tai
   11.98      if ( !__on_net_schedule_list(vif) )
   11.99      {
  11.100          list_add_tail(&vif->list, &net_schedule_list);
  11.101 +        get_vif(vif);
  11.102      }
  11.103      spin_unlock_irqrestore(&net_schedule_list_lock, flags);
  11.104  }
  11.105 @@ -723,6 +737,8 @@ static void tx_skb_release(struct sk_buf
  11.106          cpu_mask = mark_guest_event(vif->domain, _EVENT_NET_TX);
  11.107          guest_event_notify(cpu_mask);
  11.108      }
  11.109 +
  11.110 +    put_vif(vif);
  11.111  }
  11.112  
  11.113      
  11.114 @@ -741,9 +757,13 @@ static void net_tx_action(unsigned long 
  11.115          /* Get a vif from the list with work to do. */
  11.116          ent = net_schedule_list.next;
  11.117          vif = list_entry(ent, net_vif_t, list);
  11.118 +        get_vif(vif);
  11.119          remove_from_net_schedule_list(vif);
  11.120          if ( vif->shadow_ring->tx_idx == vif->shadow_ring->tx_prod )
  11.121 +        {
  11.122 +            put_vif(vif);
  11.123              continue;
  11.124 +        }
  11.125  
  11.126          /* Pick an entry from the transmit queue. */
  11.127          tx = &vif->shadow_ring->tx_ring[vif->shadow_ring->tx_idx];
  11.128 @@ -752,12 +772,17 @@ static void net_tx_action(unsigned long 
  11.129              add_to_net_schedule_list_tail(vif);
  11.130  
  11.131          /* Check the chosen entry is good. */
  11.132 -        if ( tx->status != RING_STATUS_OK ) continue;
  11.133 +        if ( tx->status != RING_STATUS_OK ) 
  11.134 +        {
  11.135 +            put_vif(vif);
  11.136 +            continue;
  11.137 +        }
  11.138  
  11.139          if ( (skb = alloc_skb_nodata(GFP_ATOMIC)) == NULL )
  11.140          {
  11.141              printk("Out of memory in net_tx_action()!\n");
  11.142              tx->status = RING_STATUS_BAD_PAGE;
  11.143 +            put_vif(vif);
  11.144              break;
  11.145          }
  11.146          
  11.147 @@ -817,14 +842,16 @@ void update_shared_ring(void)
  11.148      shared_info_t *s = current->shared_info;
  11.149      net_ring_t *net_ring;
  11.150      net_shadow_ring_t *shadow_ring;
  11.151 -    unsigned int nvif;
  11.152 -    
  11.153 +    net_vif_t *vif;
  11.154 +    struct list_head *ent;
  11.155 +
  11.156      clear_bit(_HYP_EVENT_NET_RX, &current->hyp_events);
  11.157  
  11.158 -    for ( nvif = 0; nvif < current->num_net_vifs; nvif++ )
  11.159 +    list_for_each(ent, &current->net_vifs)
  11.160      {
  11.161 -        net_ring = current->net_vif_list[nvif]->net_ring;
  11.162 -        shadow_ring = current->net_vif_list[nvif]->shadow_ring;
  11.163 +        vif = list_entry(ent, net_vif_t, dom_list);
  11.164 +        net_ring    = vif->net_ring;
  11.165 +        shadow_ring = vif->shadow_ring;
  11.166  
  11.167          /* This would mean that the guest OS has fiddled with our index. */
  11.168          if ( shadow_ring->rx_cons != net_ring->rx_cons )
  11.169 @@ -1816,25 +1843,25 @@ inline int init_tx_header(u8 *data, unsi
  11.170  
  11.171  long do_net_update(void)
  11.172  {
  11.173 +    struct list_head *ent;
  11.174      net_ring_t *net_ring;
  11.175      net_shadow_ring_t *shadow_ring;
  11.176      net_vif_t *current_vif;
  11.177 -    unsigned int i, j;
  11.178 +    unsigned int i;
  11.179      struct sk_buff *skb;
  11.180      tx_entry_t tx;
  11.181      rx_shadow_entry_t *rx;
  11.182      unsigned long pfn;
  11.183      struct pfn_info *page;
  11.184      unsigned long *g_pte;    
  11.185 +    int target;
  11.186 +    u8 *g_data;
  11.187 +    unsigned short protocol;
  11.188      
  11.189 -    for ( j = 0; j < current->num_net_vifs; j++)
  11.190 +    list_for_each(ent, &current->net_vifs)
  11.191      {
  11.192 -        int target;
  11.193 -        u8 *g_data;
  11.194 -        unsigned short protocol;
  11.195 -
  11.196 -        current_vif = current->net_vif_list[j];
  11.197 -        net_ring = current_vif->net_ring;
  11.198 +        current_vif = list_entry(ent, net_vif_t, dom_list);
  11.199 +        net_ring    = current_vif->net_ring;
  11.200          shadow_ring = current_vif->shadow_ring;
  11.201          
  11.202          /*
  11.203 @@ -1901,6 +1928,7 @@ long do_net_update(void)
  11.204                      goto tx_unmap_and_continue;
  11.205                  
  11.206                  skb->destructor = tx_skb_release;
  11.207 +                get_vif(current_vif);
  11.208  
  11.209                  shadow_ring->tx_ring[i].status = RING_STATUS_OK;
  11.210  
  11.211 @@ -1979,14 +2007,15 @@ long do_net_update(void)
  11.212              if ( (pfn >= max_page) || 
  11.213                   (page->flags != (PGT_l1_page_table | current->domain)) ) 
  11.214              {
  11.215 -                DPRINTK("Bad page frame containing ppte\n");
  11.216 +                DPRINTK("Bad page frame for ppte %d,%08lx,%08lx,%08lx\n",
  11.217 +                        current->domain, pfn, max_page, page->flags);
  11.218                  spin_unlock_irq(&current->page_lock);
  11.219                  continue;
  11.220              }
  11.221              
  11.222              g_pte = map_domain_mem(rx->addr);
  11.223              
  11.224 -            if (!(*g_pte & _PAGE_PRESENT))
  11.225 +            if ( !(*g_pte & _PAGE_PRESENT) )
  11.226              {
  11.227                  DPRINTK("Inavlid PTE passed down (not present)\n");
  11.228                  goto rx_unmap_and_continue;
  11.229 @@ -1994,7 +2023,7 @@ long do_net_update(void)
  11.230              
  11.231              page = (*g_pte >> PAGE_SHIFT) + frame_table;
  11.232              
  11.233 -            if (page->tot_count != 1) 
  11.234 +            if ( page->tot_count != 1 )
  11.235              {
  11.236  		DPRINTK("RX page mapped multple times (%d/%d/%08x)\n",
  11.237        		page->type_count, page->tot_count, page->flags);
    12.1 --- a/xen/net/skbuff.c	Wed Apr 16 12:44:16 2003 +0000
    12.2 +++ b/xen/net/skbuff.c	Wed Apr 16 17:45:17 2003 +0000
    12.3 @@ -143,7 +143,7 @@ static inline u8 *alloc_skb_data_page(st
    12.4  
    12.5      list_ptr = free_list.next;
    12.6      pf = list_entry(list_ptr, struct pfn_info, list);
    12.7 -    pf->flags = 0; /* owned by dom0 */
    12.8 +    pf->flags = 0;
    12.9      list_del(&pf->list);
   12.10      free_pfns--;
   12.11  
   12.12 @@ -162,6 +162,7 @@ static inline void dealloc_skb_data_page
   12.13  
   12.14      spin_lock_irqsave(&free_list_lock, flags);
   12.15          
   12.16 +    pf->flags = pf->type_count = pf->tot_count = 0;
   12.17      list_add(&pf->list, &free_list);
   12.18      free_pfns++;
   12.19