ia64/xen-unstable

changeset 348:e5e04893c022

bitkeeper revision 1.160.1.1 (3e8c846fQSuOz1Dd8MgUzwG5rj3bDQ)

Many files:
Free DOM0 kernel memory to Xen allocation pool after DOM0 is created. Fixed page-type handling -- we now correctly flush TLB if a page is unpinned after a disk read and refcnt falls to zero.
author kaf24@scramble.cl.cam.ac.uk
date Thu Apr 03 18:58:55 2003 +0000 (2003-04-03)
parents 6b300fe8e6b9
children 15ffd7ee35df
files xen/common/domain.c xen/common/kernel.c xen/common/memory.c xen/common/page_alloc.c xen/drivers/block/xen_block.c xen/include/xeno/config.h xen/include/xeno/mm.h xen/include/xeno/multiboot.h xen/include/xeno/sched.h xen/net/dev.c
line diff
     1.1 --- a/xen/common/domain.c	Tue Mar 25 14:10:16 2003 +0000
     1.2 +++ b/xen/common/domain.c	Thu Apr 03 18:58:55 2003 +0000
     1.3 @@ -14,16 +14,11 @@
     1.4  #include <asm/domain_page.h>
     1.5  #include <asm/flushtlb.h>
     1.6  #include <asm/msr.h>
     1.7 -#include <xeno/multiboot.h>
     1.8  #include <xeno/blkdev.h>
     1.9  
    1.10  #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED)
    1.11  #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED|_PAGE_DIRTY)
    1.12  
    1.13 -extern int nr_mods;
    1.14 -extern module_t *mod;
    1.15 -extern unsigned char *cmdline;
    1.16 -
    1.17  rwlock_t tasklist_lock __cacheline_aligned = RW_LOCK_UNLOCKED;
    1.18  
    1.19  /*
    1.20 @@ -173,7 +168,7 @@ unsigned int alloc_new_dom_mem(struct ta
    1.21      for ( alloc_pfns = 0; alloc_pfns < req_pages; alloc_pfns++ )
    1.22      {
    1.23          pf = list_entry(temp, struct pfn_info, list);
    1.24 -        pf->flags |= p->domain;
    1.25 +        pf->flags = p->domain;
    1.26          pf->type_count = pf->tot_count = 0;
    1.27          temp = temp->next;
    1.28          list_del(&pf->list);
    1.29 @@ -366,9 +361,9 @@ static unsigned long alloc_page_from_dom
    1.30  /* setup_guestos is used for building dom0 solely. other domains are built in
    1.31   * userspace dom0 and final setup is being done by final_setup_guestos.
    1.32   */
    1.33 -int setup_guestos(struct task_struct *p, dom0_newdomain_t *params)
    1.34 +int setup_guestos(struct task_struct *p, dom0_newdomain_t *params, 
    1.35 +                  char *data_start, unsigned long data_len, char *cmdline)
    1.36  {
    1.37 -
    1.38      struct list_head *list_ent;
    1.39      char *src, *dst;
    1.40      int i, dom = p->domain;
    1.41 @@ -387,13 +382,13 @@ int setup_guestos(struct task_struct *p,
    1.42      /* Sanity! */
    1.43      if ( p->domain != 0 ) BUG();
    1.44  
    1.45 -    if ( strncmp(__va(mod[0].mod_start), "XenoGues", 8) )
    1.46 +    if ( strncmp(data_start, "XenoGues", 8) )
    1.47      {
    1.48          printk("DOM%d: Invalid guest OS image\n", dom);
    1.49          return -1;
    1.50      }
    1.51  
    1.52 -    virt_load_address = *(unsigned long *)__va(mod[0].mod_start + 8);
    1.53 +    virt_load_address = *(unsigned long *)(data_start + 8);
    1.54      if ( (virt_load_address & (PAGE_SIZE-1)) )
    1.55      {
    1.56          printk("DOM%d: Guest OS load address not page-aligned (%08lx)\n",
    1.57 @@ -407,13 +402,12 @@ int setup_guestos(struct task_struct *p,
    1.58      alloc_address <<= PAGE_SHIFT;
    1.59      alloc_index = p->tot_pages;
    1.60  
    1.61 -    if ( (mod[nr_mods-1].mod_end-mod[0].mod_start) > 
    1.62 -         (params->memory_kb << 9) )
    1.63 +    if ( data_len > (params->memory_kb << 9) )
    1.64      {
    1.65          printk("DOM%d: Guest OS image is too large\n"
    1.66                 "       (%luMB is greater than %uMB limit for a\n"
    1.67                 "        %uMB address space)\n",
    1.68 -               dom, (mod[nr_mods-1].mod_end-mod[0].mod_start)>>20,
    1.69 +               dom, data_len>>20,
    1.70                 (params->memory_kb)>>11,
    1.71                 (params->memory_kb)>>10);
    1.72          free_all_dom_mem(p);
    1.73 @@ -539,9 +533,9 @@ int setup_guestos(struct task_struct *p,
    1.74      __write_cr3_counted(pagetable_val(p->mm.pagetable));
    1.75  
    1.76      /* Copy the guest OS image. */
    1.77 -    src = (char *)__va(mod[0].mod_start + 12);
    1.78 +    src = (char *)(data_start + 12);
    1.79      dst = (char *)virt_load_address;
    1.80 -    while ( src < (char *)__va(mod[nr_mods-1].mod_end) ) *dst++ = *src++;
    1.81 +    while ( src < (data_start+data_len) ) *dst++ = *src++;
    1.82  
    1.83      /* Set up start info area. */
    1.84      memset(virt_startinfo_address, 0, sizeof(*virt_startinfo_address));
    1.85 @@ -568,23 +562,13 @@ int setup_guestos(struct task_struct *p,
    1.86      /* Add block io interface */
    1.87      virt_startinfo_address->blk_ring = virt_to_phys(p->blk_ring_base); 
    1.88  
    1.89 -    /* We tell OS about any modules we were given. */
    1.90 -    if ( nr_mods > 1 )
    1.91 +    dst = virt_startinfo_address->cmd_line;
    1.92 +    if ( cmdline != NULL )
    1.93      {
    1.94 -        virt_startinfo_address->mod_start = 
    1.95 -            (mod[1].mod_start-mod[0].mod_start-12) + virt_load_address;
    1.96 -        virt_startinfo_address->mod_len = 
    1.97 -            mod[nr_mods-1].mod_end - mod[1].mod_start;
    1.98 -    }
    1.99 -
   1.100 -    dst = virt_startinfo_address->cmd_line;
   1.101 -    if ( mod[0].string )
   1.102 -    {
   1.103 -        char *modline = (char *)__va(mod[0].string);
   1.104          for ( i = 0; i < 255; i++ )
   1.105          {
   1.106 -            if ( modline[i] == '\0' ) break;
   1.107 -            *dst++ = modline[i];
   1.108 +            if ( cmdline[i] == '\0' ) break;
   1.109 +            *dst++ = cmdline[i];
   1.110          }
   1.111      }
   1.112      *dst = '\0';
     2.1 --- a/xen/common/kernel.c	Tue Mar 25 14:10:16 2003 +0000
     2.2 +++ b/xen/common/kernel.c	Thu Apr 03 18:58:55 2003 +0000
     2.3 @@ -1,8 +1,8 @@
     2.4  #include <stdarg.h>
     2.5  #include <xeno/lib.h>
     2.6  #include <xeno/errno.h>
     2.7 +#include <xeno/spinlock.h>
     2.8  #include <xeno/multiboot.h>
     2.9 -#include <xeno/spinlock.h>
    2.10  #include <xeno/sched.h>
    2.11  #include <xeno/mm.h>
    2.12  #include <xeno/delay.h>
    2.13 @@ -27,10 +27,6 @@ struct e820entry {
    2.14      unsigned long type;                    /* type of memory segment */
    2.15  };
    2.16  
    2.17 -/* Used by domain.c:setup_guestos */
    2.18 -int nr_mods;
    2.19 -module_t *mod;
    2.20 -
    2.21  void init_vga(void);
    2.22  void init_serial(void);
    2.23  void start_of_day(void);
    2.24 @@ -65,6 +61,7 @@ void cmain (unsigned long magic, multibo
    2.25      dom0_newdomain_t dom0_params;
    2.26      unsigned long max_page;
    2.27      unsigned char *cmdline;
    2.28 +    module_t *mod;
    2.29      int i;
    2.30  
    2.31      /*
    2.32 @@ -119,8 +116,7 @@ void cmain (unsigned long magic, multibo
    2.33      }
    2.34  #endif
    2.35  
    2.36 -    nr_mods = mbi->mods_count;
    2.37 -    mod     = (module_t *)__va(mbi->mods_addr);
    2.38 +    mod = (module_t *)__va(mbi->mods_addr);
    2.39  
    2.40      /* Parse the command line. */
    2.41      cmdline = (unsigned char *)(mbi->cmdline ? __va(mbi->cmdline) : NULL);
    2.42 @@ -174,7 +170,7 @@ void cmain (unsigned long magic, multibo
    2.43      printk("Initialised all memory on a %luMB machine\n",
    2.44             max_page >> (20-PAGE_SHIFT));
    2.45  
    2.46 -    init_page_allocator(mod[nr_mods-1].mod_end, MAX_MONITOR_ADDRESS);
    2.47 +    init_page_allocator(mod[0].mod_end, MAX_MONITOR_ADDRESS);
    2.48   
    2.49      /* These things will get done by do_newdomain() for all other tasks. */
    2.50      current->shared_info = (void *)get_free_page(GFP_KERNEL);
    2.51 @@ -195,11 +191,16 @@ void cmain (unsigned long magic, multibo
    2.52  
    2.53      new_dom = do_newdomain(0, 0);
    2.54      if ( new_dom == NULL ) panic("Error creating domain 0\n");
    2.55 -    if ( setup_guestos(new_dom, &dom0_params) != 0 )
    2.56 -    {
    2.57 -        panic("Could not set up DOM0 guest OS\n");
    2.58 -    }
    2.59 -	update_dom_time(new_dom->shared_info);
    2.60 +    if ( setup_guestos(new_dom, 
    2.61 +                       &dom0_params, 
    2.62 +                       __va(mod[0].mod_start), 
    2.63 +                       mod[0].mod_end - mod[0].mod_start, 
    2.64 +                       __va(mod[0].string))
    2.65 +         != 0 ) panic("Could not set up DOM0 guest OS\n");
    2.66 +
    2.67 +    release_bytes_to_allocator(__pa(&_end), mod[0].mod_end);
    2.68 +
    2.69 +    update_dom_time(new_dom->shared_info);
    2.70      wake_up(new_dom);
    2.71  
    2.72      cpu_idle();
     3.1 --- a/xen/common/memory.c	Tue Mar 25 14:10:16 2003 +0000
     3.2 +++ b/xen/common/memory.c	Thu Apr 03 18:58:55 2003 +0000
     3.3 @@ -275,6 +275,7 @@ static int inc_page_refcnt(unsigned long
     3.4              return -1;
     3.5          }
     3.6  
     3.7 +        page->flags &= ~PG_type_mask;
     3.8          page->flags |= type;
     3.9      }
    3.10  
    3.11 @@ -286,7 +287,6 @@ static int inc_page_refcnt(unsigned long
    3.12  static int dec_page_refcnt(unsigned long page_nr, unsigned int type)
    3.13  {
    3.14      struct pfn_info *page;
    3.15 -    int ret;
    3.16  
    3.17      if ( page_nr >= max_page )
    3.18      {
    3.19 @@ -303,9 +303,8 @@ static int dec_page_refcnt(unsigned long
    3.20          return -1;
    3.21      }
    3.22      ASSERT(page_type_count(page) != 0);
    3.23 -    if ( (ret = put_page_type(page)) == 0 ) page->flags &= ~PG_type_mask;
    3.24      put_page_tot(page);
    3.25 -    return ret;
    3.26 +    return put_page_type(page);
    3.27  }
    3.28  
    3.29  
    3.30 @@ -439,8 +438,10 @@ static int get_page(unsigned long page_n
    3.31                          page_type_count(page));
    3.32                  return(-1);
    3.33              }
    3.34 +            page->flags &= ~PG_type_mask;
    3.35              page->flags |= PGT_writeable_page;
    3.36          }
    3.37 +        page->flags &= ~PG_noflush;
    3.38          get_page_type(page);
    3.39      }
    3.40  
    3.41 @@ -501,10 +502,7 @@ static void put_page(unsigned long page_
    3.42             ((page_type_count(page) != 0) && 
    3.43              ((page->flags & PG_type_mask) == PGT_writeable_page)));
    3.44      if ( writeable && (put_page_type(page) == 0) )
    3.45 -    {
    3.46          tlb_flush[smp_processor_id()] = 1;
    3.47 -        page->flags &= ~PG_type_mask;
    3.48 -    }
    3.49      put_page_tot(page);
    3.50  }
    3.51  
     4.1 --- a/xen/common/page_alloc.c	Tue Mar 25 14:10:16 2003 +0000
     4.2 +++ b/xen/common/page_alloc.c	Thu Apr 03 18:58:55 2003 +0000
     4.3 @@ -110,7 +110,10 @@ static chunk_head_t  free_tail[FREELIST_
     4.4  #define round_pgup(_p)    (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
     4.5  
     4.6  
     4.7 -/* Initialise allocator, placing addresses [@min,@max] in free pool. */
     4.8 +/*
     4.9 + * Initialise allocator, placing addresses [@min,@max] in free pool.
    4.10 + * @min and @max are PHYSICAL addresses.
    4.11 + */
    4.12  void __init init_page_allocator(unsigned long min, unsigned long max)
    4.13  {
    4.14      int i;
    4.15 @@ -168,7 +171,21 @@ void __init init_page_allocator(unsigned
    4.16  }
    4.17  
    4.18  
    4.19 -/* Allocate 2^@order contiguous pages. */
    4.20 +/* Release a PHYSICAL address range to the allocator. */
    4.21 +void release_bytes_to_allocator(unsigned long min, unsigned long max)
    4.22 +{
    4.23 +    min = round_pgup  (min) + PAGE_OFFSET;
    4.24 +    max = round_pgdown(max) + PAGE_OFFSET;
    4.25 +
    4.26 +    while ( min < max )
    4.27 +    {
    4.28 +        __free_pages(min, 0);
    4.29 +        min += PAGE_SIZE;
    4.30 +    }
    4.31 +}
    4.32 +
    4.33 +
    4.34 +/* Allocate 2^@order contiguous pages. Returns a VIRTUAL address. */
    4.35  unsigned long __get_free_pages(int mask, int order)
    4.36  {
    4.37      int i, attempts = 0;
    4.38 @@ -233,7 +250,7 @@ retry:
    4.39  }
    4.40  
    4.41  
    4.42 -/* Free 2^@order pages at location @p. */
    4.43 +/* Free 2^@order pages at VIRTUAL address @p. */
    4.44  void __free_pages(unsigned long p, int order)
    4.45  {
    4.46      unsigned long size = 1 << (order + PAGE_SHIFT);
     5.1 --- a/xen/drivers/block/xen_block.c	Tue Mar 25 14:10:16 2003 +0000
     5.2 +++ b/xen/drivers/block/xen_block.c	Thu Apr 03 18:58:55 2003 +0000
     5.3 @@ -277,7 +277,8 @@ static int __buffer_is_valid(struct task
     5.4  
     5.5          /* If reading into the frame, the frame must be writeable. */
     5.6          if ( writeable_buffer &&
     5.7 -             ((page->flags & PG_type_mask) != PGT_writeable_page) )
     5.8 +             ((page->flags & PG_type_mask) != PGT_writeable_page) &&
     5.9 +             (page->type_count != 0) )
    5.10          {
    5.11              DPRINTK("non-writeable page passed for block read\n");
    5.12              goto out;
    5.13 @@ -301,7 +302,16 @@ static void __lock_buffer(unsigned long 
    5.14            pfn++ )
    5.15      {
    5.16          page = frame_table + pfn;
    5.17 -        if ( writeable_buffer ) get_page_type(page);
    5.18 +        if ( writeable_buffer )
    5.19 +        {
    5.20 +            if ( page->type_count == 0 )
    5.21 +            {
    5.22 +                page->flags &= ~PG_type_mask;
    5.23 +                /* NB. This ref alone won't cause a TLB flush. */
    5.24 +                page->flags |= PGT_writeable_page | PG_noflush;
    5.25 +            }
    5.26 +            get_page_type(page);
    5.27 +        }
    5.28          get_page_tot(page);
    5.29      }
    5.30  }
    5.31 @@ -320,8 +330,13 @@ static void unlock_buffer(struct task_st
    5.32            pfn++ )
    5.33      {
    5.34          page = frame_table + pfn;
    5.35 -        if ( writeable_buffer && (put_page_type(page) == 0) )
    5.36 -            page->flags &= ~PG_type_mask;
    5.37 +        if ( writeable_buffer &&
    5.38 +             (put_page_type(page) == 0) &&
    5.39 +             !(page->flags & PG_noflush) )
    5.40 +        {
    5.41 +            __flush_tlb();
    5.42 +        }
    5.43 +        page->flags &= ~PG_noflush;
    5.44          put_page_tot(page);
    5.45      }
    5.46      spin_unlock_irqrestore(&p->page_lock, flags);
     6.1 --- a/xen/include/xeno/config.h	Tue Mar 25 14:10:16 2003 +0000
     6.2 +++ b/xen/include/xeno/config.h	Thu Apr 03 18:58:55 2003 +0000
     6.3 @@ -141,6 +141,7 @@
     6.4  #define capable(_c) 0
     6.5  
     6.6  #ifndef __ASSEMBLY__
     6.7 +extern unsigned long _end; /* standard ELF symbol */
     6.8  extern void __out_of_line_bug(int line) __attribute__((noreturn));
     6.9  #define out_of_line_bug() __out_of_line_bug(__LINE__)
    6.10  #endif
     7.1 --- a/xen/include/xeno/mm.h	Tue Mar 25 14:10:16 2003 +0000
     7.2 +++ b/xen/include/xeno/mm.h	Thu Apr 03 18:58:55 2003 +0000
     7.3 @@ -35,6 +35,7 @@
     7.4   */
     7.5  
     7.6  void init_page_allocator(unsigned long min, unsigned long max);
     7.7 +void release_bytes_to_allocator(unsigned long min, unsigned long max);
     7.8  unsigned long __get_free_pages(int mask, int order);
     7.9  void __free_pages(unsigned long p, int order);
    7.10  #define get_free_page(_m) (__get_free_pages((_m),0))
    7.11 @@ -51,10 +52,6 @@ void __free_pages(unsigned long p, int o
    7.12   * with struct pfn_info and frame_table respectively. Boris Dragovic
    7.13   */
    7.14  
    7.15 -/*
    7.16 - * This is still fatter than I'd like. Do we need the count?
    7.17 - * Do we need the flags? The list at least seems req'd by slab.c.
    7.18 - */
    7.19  typedef struct pfn_info {
    7.20      struct list_head list;      /* ->mapping has some page lists. */
    7.21      unsigned long flags;        /* atomic flags. */
    7.22 @@ -100,6 +97,19 @@ typedef struct pfn_info {
    7.23  #define PGT_writeable_page  (7<<24) /* has writable mappings of this page? */
    7.24  #define PGT_net_rx_buf      (8<<24) /* this page has been pirated by the net code. */
    7.25  
    7.26 +/*
    7.27 + * This bit is sometimes set by Xen when it holds a writeable reference to a 
    7.28 + * page that shouldn't cause a TLB flush when it is dropped. For example, a 
    7.29 + * disk write to a page with initial type_count == 0, which returns to 0 after 
    7.30 + * the I/O. In this case, we'd normally flush the TLB because a writeable page 
    7.31 + * has just lost its mutually-exclusive type. But this isn't necessary here 
    7.32 + * because the writeable reference never made it into user-accessible TLB 
    7.33 + * (didn't make it into TLB at all, in fact).
    7.34 + * 
    7.35 + * This bit is obviously nuked in a few places, for safety.
    7.36 + */
    7.37 +#define PG_noflush          (1<<28)
    7.38 +
    7.39  #define PageSlab(page)		test_bit(PG_slab, &(page)->flags)
    7.40  #define PageSetSlab(page)	set_bit(PG_slab, &(page)->flags)
    7.41  #define PageClearSlab(page)	clear_bit(PG_slab, &(page)->flags)
     8.1 --- a/xen/include/xeno/multiboot.h	Tue Mar 25 14:10:16 2003 +0000
     8.2 +++ b/xen/include/xeno/multiboot.h	Thu Apr 03 18:58:55 2003 +0000
     8.3 @@ -15,6 +15,9 @@
     8.4     along with this program; if not, write to the Free Software
     8.5     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
     8.6  
     8.7 +#ifndef __MULTIBOOT_H__
     8.8 +#define __MULTIBOOT_H__
     8.9 +
    8.10  #ifndef __ELF__
    8.11  #error "Build on a 32-bit ELF system"
    8.12  #endif
    8.13 @@ -79,3 +82,5 @@ typedef struct memory_map
    8.14    unsigned long length_high;
    8.15    unsigned long type;
    8.16  } memory_map_t;
    8.17 +
    8.18 +#endif /* __MULTIBOOT_H__ */
     9.1 --- a/xen/include/xeno/sched.h	Tue Mar 25 14:10:16 2003 +0000
     9.2 +++ b/xen/include/xeno/sched.h	Thu Apr 03 18:58:55 2003 +0000
     9.3 @@ -200,7 +200,9 @@ extern union task_union idle0_task_union
     9.4  extern struct task_struct first_task_struct;
     9.5  
     9.6  extern struct task_struct *do_newdomain(unsigned int dom_id, unsigned int cpu);
     9.7 -extern int setup_guestos(struct task_struct *p, dom0_newdomain_t *params);
     9.8 +extern int setup_guestos(
     9.9 +    struct task_struct *p, dom0_newdomain_t *params,
    9.10 +    char *data_start, unsigned long data_len, char *cmdline);
    9.11  extern int final_setup_guestos(struct task_struct *p, dom_meminfo_t *);
    9.12  
    9.13  struct task_struct *find_domain_by_id(unsigned int dom);
    10.1 --- a/xen/net/dev.c	Tue Mar 25 14:10:16 2003 +0000
    10.2 +++ b/xen/net/dev.c	Thu Apr 03 18:58:55 2003 +0000
    10.3 @@ -519,7 +519,7 @@ void deliver_packet(struct sk_buff *skb,
    10.4          
    10.5      h_pfn->tot_count = h_pfn->type_count = 1;
    10.6      g_pfn->tot_count = g_pfn->type_count = 0;
    10.7 -    h_pfn->flags = g_pfn->flags & (~PG_type_mask);
    10.8 +    h_pfn->flags = g_pfn->flags & ~PG_type_mask;
    10.9          
   10.10      if (*g_pte & _PAGE_RW) h_pfn->flags |= PGT_writeable_page;
   10.11      g_pfn->flags = 0;