ia64/xen-unstable

changeset 15310:3d5f39c610ad

[XEN] Make common log-dirty paging code and add HAP log-dirty support.
Signed-off-by: Wei Huang <wei.huang2@amd.com>
author Tim Deegan <Tim.Deegan@xensource.com>
date Mon Jun 11 14:35:52 2007 +0100 (2007-06-11)
parents f1c6de438b83
children 2c8c6ca1296b
files xen/arch/x86/hvm/hvm.c xen/arch/x86/hvm/io.c xen/arch/x86/hvm/svm/svm.c xen/arch/x86/mm.c xen/arch/x86/mm/hap/hap.c xen/arch/x86/mm/p2m.c xen/arch/x86/mm/paging.c xen/arch/x86/mm/shadow/common.c xen/arch/x86/mm/shadow/multi.c xen/arch/x86/mm/shadow/private.h xen/include/asm-x86/domain.h xen/include/asm-x86/grant_table.h xen/include/asm-x86/p2m.h xen/include/asm-x86/paging.h xen/include/asm-x86/shadow.h
line diff
     1.1 --- a/xen/arch/x86/hvm/hvm.c	Mon Jun 11 11:37:10 2007 +0100
     1.2 +++ b/xen/arch/x86/hvm/hvm.c	Mon Jun 11 14:35:52 2007 +0100
     1.3 @@ -568,7 +568,7 @@ static int __hvm_copy(void *buf, paddr_t
     1.4          if ( dir )
     1.5          {
     1.6              memcpy(p, buf, count); /* dir == TRUE:  *to* guest */
     1.7 -            mark_dirty(current->domain, mfn);
     1.8 +            paging_mark_dirty(current->domain, mfn);
     1.9          }
    1.10          else
    1.11              memcpy(buf, p, count); /* dir == FALSE: *from guest */
     2.1 --- a/xen/arch/x86/hvm/io.c	Mon Jun 11 11:37:10 2007 +0100
     2.2 +++ b/xen/arch/x86/hvm/io.c	Mon Jun 11 14:35:52 2007 +0100
     2.3 @@ -865,7 +865,7 @@ void hvm_io_assist(void)
     2.4      if ( (p->dir == IOREQ_READ) && p->data_is_ptr )
     2.5      {
     2.6          gmfn = get_mfn_from_gpfn(paging_gva_to_gfn(v, p->data));
     2.7 -        mark_dirty(d, gmfn);
     2.8 +        paging_mark_dirty(d, gmfn);
     2.9      }
    2.10  
    2.11   out:
     3.1 --- a/xen/arch/x86/hvm/svm/svm.c	Mon Jun 11 11:37:10 2007 +0100
     3.2 +++ b/xen/arch/x86/hvm/svm/svm.c	Mon Jun 11 14:35:52 2007 +0100
     3.3 @@ -1027,8 +1027,8 @@ static int svm_do_nested_pgfault(paddr_t
     3.4          return 1;
     3.5      }
     3.6  
     3.7 -    /* We should not reach here. Otherwise, P2M table is not correct.*/
     3.8 -    return 0;
     3.9 +    paging_mark_dirty(current->domain, get_mfn_from_gpfn(gpa >> PAGE_SHIFT));
    3.10 +    return p2m_set_flags(current->domain, gpa, __PAGE_HYPERVISOR|_PAGE_USER);
    3.11  }
    3.12  
    3.13  static void svm_do_no_device_fault(struct vmcb_struct *vmcb)
     4.1 --- a/xen/arch/x86/mm.c	Mon Jun 11 11:37:10 2007 +0100
     4.2 +++ b/xen/arch/x86/mm.c	Mon Jun 11 14:35:52 2007 +0100
     4.3 @@ -1556,7 +1556,7 @@ int alloc_page_type(struct page_info *pa
     4.4  
     4.5      /* A page table is dirtied when its type count becomes non-zero. */
     4.6      if ( likely(owner != NULL) )
     4.7 -        mark_dirty(owner, page_to_mfn(page));
     4.8 +        paging_mark_dirty(owner, page_to_mfn(page));
     4.9  
    4.10      switch ( type & PGT_type_mask )
    4.11      {
    4.12 @@ -1602,7 +1602,7 @@ void free_page_type(struct page_info *pa
    4.13          if ( unlikely(paging_mode_enabled(owner)) )
    4.14          {
    4.15              /* A page table is dirtied when its type count becomes zero. */
    4.16 -            mark_dirty(owner, page_to_mfn(page));
    4.17 +            paging_mark_dirty(owner, page_to_mfn(page));
    4.18  
    4.19              if ( shadow_mode_refcounts(owner) )
    4.20                  return;
    4.21 @@ -2057,7 +2057,7 @@ int do_mmuext_op(
    4.22              }
    4.23  
    4.24              /* A page is dirtied when its pin status is set. */
    4.25 -            mark_dirty(d, mfn);
    4.26 +            paging_mark_dirty(d, mfn);
    4.27             
    4.28              /* We can race domain destruction (domain_relinquish_resources). */
    4.29              if ( unlikely(this_cpu(percpu_mm_info).foreign != NULL) )
    4.30 @@ -2089,7 +2089,7 @@ int do_mmuext_op(
    4.31                  put_page_and_type(page);
    4.32                  put_page(page);
    4.33                  /* A page is dirtied when its pin status is cleared. */
    4.34 -                mark_dirty(d, mfn);
    4.35 +                paging_mark_dirty(d, mfn);
    4.36              }
    4.37              else
    4.38              {
    4.39 @@ -2424,7 +2424,7 @@ int do_mmu_update(
    4.40              set_gpfn_from_mfn(mfn, gpfn);
    4.41              okay = 1;
    4.42  
    4.43 -            mark_dirty(FOREIGNDOM, mfn);
    4.44 +            paging_mark_dirty(FOREIGNDOM, mfn);
    4.45  
    4.46              put_page(mfn_to_page(mfn));
    4.47              break;
    4.48 @@ -3005,7 +3005,7 @@ long do_update_descriptor(u64 pa, u64 de
    4.49          break;
    4.50      }
    4.51  
    4.52 -    mark_dirty(dom, mfn);
    4.53 +    paging_mark_dirty(dom, mfn);
    4.54  
    4.55      /* All is good so make the update. */
    4.56      gdt_pent = map_domain_page(mfn);
     5.1 --- a/xen/arch/x86/mm/hap/hap.c	Mon Jun 11 11:37:10 2007 +0100
     5.2 +++ b/xen/arch/x86/mm/hap/hap.c	Mon Jun 11 14:35:52 2007 +0100
     5.3 @@ -50,6 +50,40 @@
     5.4  #define page_to_mfn(_pg) (_mfn((_pg) - frame_table))
     5.5  
     5.6  /************************************************/
     5.7 +/*            HAP LOG DIRTY SUPPORT             */
     5.8 +/************************************************/
     5.9 +/* hap code to call when log_dirty is enable. return 0 if no problem found. */
    5.10 +int hap_enable_log_dirty(struct domain *d)
    5.11 +{
    5.12 +    hap_lock(d);
    5.13 +    /* turn on PG_log_dirty bit in paging mode */
    5.14 +    d->arch.paging.mode |= PG_log_dirty;
    5.15 +    /* set l1e entries of P2M table to NOT_WRITABLE. */
    5.16 +    p2m_set_flags_global(d, (_PAGE_PRESENT|_PAGE_USER));
    5.17 +    flush_tlb_all_pge();
    5.18 +    hap_unlock(d);
    5.19 +
    5.20 +    return 0;
    5.21 +}
    5.22 +
    5.23 +int hap_disable_log_dirty(struct domain *d)
    5.24 +{
    5.25 +    hap_lock(d);
    5.26 +    d->arch.paging.mode &= ~PG_log_dirty;
    5.27 +    /* set l1e entries of P2M table with normal mode */
    5.28 +    p2m_set_flags_global(d, __PAGE_HYPERVISOR|_PAGE_USER);
    5.29 +    hap_unlock(d);
    5.30 +    
    5.31 +    return 1;
    5.32 +}
    5.33 +
    5.34 +void hap_clean_dirty_bitmap(struct domain *d)
    5.35 +{
    5.36 +    /* mark physical memory as NOT_WRITEABLE and flush the TLB */
    5.37 +    p2m_set_flags_global(d, (_PAGE_PRESENT|_PAGE_USER));
    5.38 +    flush_tlb_all_pge();
    5.39 +}
    5.40 +/************************************************/
    5.41  /*             HAP SUPPORT FUNCTIONS            */
    5.42  /************************************************/
    5.43  mfn_t hap_alloc(struct domain *d)
    5.44 @@ -421,6 +455,10 @@ int hap_enable(struct domain *d, u32 mod
    5.45          }
    5.46      }
    5.47  
    5.48 +    /* initialize log dirty here */
    5.49 +    paging_log_dirty_init(d, hap_enable_log_dirty, hap_disable_log_dirty,
    5.50 +                          hap_clean_dirty_bitmap);
    5.51 +
    5.52      /* allocate P2m table */
    5.53      if ( mode & PG_translate ) {
    5.54          rv = p2m_alloc_table(d, hap_alloc_p2m_page, hap_free_p2m_page);
    5.55 @@ -498,11 +536,6 @@ int hap_domctl(struct domain *d, xen_dom
    5.56  
    5.57      HERE_I_AM;
    5.58  
    5.59 -    if ( unlikely(d == current->domain) ) {
    5.60 -        gdprintk(XENLOG_INFO, "Don't try to do a hap op on yourself!\n");
    5.61 -        return -EINVAL;
    5.62 -    }
    5.63 -    
    5.64      switch ( sc->op ) {
    5.65      case XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION:
    5.66          hap_lock(d);
    5.67 @@ -669,7 +702,16 @@ void
    5.68  hap_write_p2m_entry(struct vcpu *v, unsigned long gfn, l1_pgentry_t *p,
    5.69                      l1_pgentry_t new, unsigned int level)
    5.70  {
    5.71 -    hap_lock(v->domain);
    5.72 +    int do_locking;
    5.73 +
    5.74 +    /* This function can be called from two directions (P2M and log dirty). We
    5.75 +     *  need to make sure this lock has been held or not.
    5.76 +     */
    5.77 +    do_locking = !hap_locked_by_me(v->domain);
    5.78 +
    5.79 +    if ( do_locking )
    5.80 +        hap_lock(v->domain);
    5.81 +
    5.82      safe_write_pte(p, new);
    5.83  #if CONFIG_PAGING_LEVELS == 3
    5.84      /* install P2M in monitor table for PAE Xen */
    5.85 @@ -680,7 +722,9 @@ hap_write_p2m_entry(struct vcpu *v, unsi
    5.86  	
    5.87      }
    5.88  #endif
    5.89 -    hap_unlock(v->domain);
    5.90 +    
    5.91 +    if ( do_locking )
    5.92 +        hap_unlock(v->domain);
    5.93  }
    5.94  
    5.95  /* Entry points into this mode of the hap code. */
     6.1 --- a/xen/arch/x86/mm/p2m.c	Mon Jun 11 11:37:10 2007 +0100
     6.2 +++ b/xen/arch/x86/mm/p2m.c	Mon Jun 11 14:35:52 2007 +0100
     6.3 @@ -169,7 +169,7 @@ p2m_next_level(struct domain *d, mfn_t *
     6.4  
     6.5  // Returns 0 on error (out of memory)
     6.6  static int
     6.7 -set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn)
     6.8 +set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn, u32 l1e_flags)
     6.9  {
    6.10      // XXX -- this might be able to be faster iff current->domain == d
    6.11      mfn_t table_mfn = pagetable_get_mfn(d->arch.phys_table);
    6.12 @@ -213,7 +213,7 @@ set_p2m_entry(struct domain *d, unsigned
    6.13          d->arch.p2m.max_mapped_pfn = gfn;
    6.14  
    6.15      if ( mfn_valid(mfn) )
    6.16 -        entry_content = l1e_from_pfn(mfn_x(mfn), __PAGE_HYPERVISOR|_PAGE_USER);
    6.17 +        entry_content = l1e_from_pfn(mfn_x(mfn), l1e_flags);
    6.18      else
    6.19          entry_content = l1e_empty();
    6.20  
    6.21 @@ -278,7 +278,7 @@ int p2m_alloc_table(struct domain *d,
    6.22          p2m_unlock(d);
    6.23          return -ENOMEM;
    6.24      }
    6.25 -list_add_tail(&p2m_top->list, &d->arch.p2m.pages);
    6.26 +    list_add_tail(&p2m_top->list, &d->arch.p2m.pages);
    6.27  
    6.28      p2m_top->count_info = 1;
    6.29      p2m_top->u.inuse.type_info = 
    6.30 @@ -297,8 +297,8 @@ list_add_tail(&p2m_top->list, &d->arch.p
    6.31   
    6.32      /* Initialise physmap tables for slot zero. Other code assumes this. */
    6.33      gfn = 0;
    6.34 -mfn = _mfn(INVALID_MFN);
    6.35 -    if ( !set_p2m_entry(d, gfn, mfn) )
    6.36 +    mfn = _mfn(INVALID_MFN);
    6.37 +    if ( !set_p2m_entry(d, gfn, mfn, __PAGE_HYPERVISOR|_PAGE_USER) )
    6.38          goto error;
    6.39  
    6.40      for ( entry = d->page_list.next;
    6.41 @@ -316,7 +316,7 @@ mfn = _mfn(INVALID_MFN);
    6.42              (gfn != 0x55555555L)
    6.43  #endif
    6.44               && gfn != INVALID_M2P_ENTRY
    6.45 -             && !set_p2m_entry(d, gfn, mfn) )
    6.46 +             && !set_p2m_entry(d, gfn, mfn, __PAGE_HYPERVISOR|_PAGE_USER) )
    6.47              goto error;
    6.48      }
    6.49  
    6.50 @@ -497,7 +497,7 @@ static void audit_p2m(struct domain *d)
    6.51              /* This m2p entry is stale: the domain has another frame in
    6.52               * this physical slot.  No great disaster, but for neatness,
    6.53               * blow away the m2p entry. */ 
    6.54 -            set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY);
    6.55 +            set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY, __PAGE_HYPERVISOR|_PAGE_USER);
    6.56          }
    6.57  
    6.58          if ( test_linear && (gfn <= d->arch.p2m.max_mapped_pfn) )
    6.59 @@ -626,7 +626,7 @@ p2m_remove_page(struct domain *d, unsign
    6.60      ASSERT(mfn_x(gfn_to_mfn(d, gfn)) == mfn);
    6.61      //ASSERT(mfn_to_gfn(d, mfn) == gfn);
    6.62  
    6.63 -    set_p2m_entry(d, gfn, _mfn(INVALID_MFN));
    6.64 +    set_p2m_entry(d, gfn, _mfn(INVALID_MFN), __PAGE_HYPERVISOR|_PAGE_USER);
    6.65      set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY);
    6.66  }
    6.67  
    6.68 @@ -659,7 +659,7 @@ guest_physmap_add_page(struct domain *d,
    6.69      omfn = gfn_to_mfn(d, gfn);
    6.70      if ( mfn_valid(omfn) )
    6.71      {
    6.72 -        set_p2m_entry(d, gfn, _mfn(INVALID_MFN));
    6.73 +        set_p2m_entry(d, gfn, _mfn(INVALID_MFN), __PAGE_HYPERVISOR|_PAGE_USER);
    6.74          set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
    6.75      }
    6.76  
    6.77 @@ -685,13 +685,129 @@ guest_physmap_add_page(struct domain *d,
    6.78          }
    6.79      }
    6.80  
    6.81 -    set_p2m_entry(d, gfn, _mfn(mfn));
    6.82 +    set_p2m_entry(d, gfn, _mfn(mfn), __PAGE_HYPERVISOR|_PAGE_USER);
    6.83      set_gpfn_from_mfn(mfn, gfn);
    6.84  
    6.85      audit_p2m(d);
    6.86      p2m_unlock(d);
    6.87  }
    6.88  
    6.89 +/* This function goes through P2M table and modify l1e flags of all pages. Note
    6.90 + * that physical base address of l1e is intact. This function can be used for
    6.91 + * special purpose, such as marking physical memory as NOT WRITABLE for
    6.92 + * tracking dirty pages during live migration.
    6.93 + */
    6.94 +void p2m_set_flags_global(struct domain *d, u32 l1e_flags)
    6.95 +{
    6.96 +    unsigned long mfn, gfn;
    6.97 +    l1_pgentry_t l1e_content;
    6.98 +    l1_pgentry_t *l1e;
    6.99 +    l2_pgentry_t *l2e;
   6.100 +    int i1, i2;
   6.101 +#if CONFIG_PAGING_LEVELS >= 3
   6.102 +    l3_pgentry_t *l3e;
   6.103 +    int i3;
   6.104 +#if CONFIG_PAGING_LEVELS == 4
   6.105 +    l4_pgentry_t *l4e;
   6.106 +    int i4;
   6.107 +#endif /* CONFIG_PAGING_LEVELS == 4 */
   6.108 +#endif /* CONFIG_PAGING_LEVELS >= 3 */
   6.109 +    
   6.110 +    if ( !paging_mode_translate(d) )
   6.111 +        return;
   6.112 + 
   6.113 +    if ( pagetable_get_pfn(d->arch.phys_table) == 0 )
   6.114 +        return;
   6.115 +
   6.116 +    p2m_lock(d);
   6.117 +        
   6.118 +#if CONFIG_PAGING_LEVELS == 4
   6.119 +    l4e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
   6.120 +#elif CONFIG_PAGING_LEVELS == 3
   6.121 +    l3e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
   6.122 +#else /* CONFIG_PAGING_LEVELS == 2 */
   6.123 +    l2e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
   6.124 +#endif
   6.125 +
   6.126 +#if CONFIG_PAGING_LEVELS >= 3
   6.127 +#if CONFIG_PAGING_LEVELS >= 4
   6.128 +    for ( i4 = 0; i4 < L4_PAGETABLE_ENTRIES; i4++ ) 
   6.129 +    {
   6.130 +	if ( !(l4e_get_flags(l4e[i4]) & _PAGE_PRESENT) )
   6.131 +	{
   6.132 +	    continue;
   6.133 +	}
   6.134 +	l3e = map_domain_page(mfn_x(_mfn(l4e_get_pfn(l4e[i4]))));
   6.135 +#endif /* now at levels 3 or 4... */
   6.136 +	for ( i3 = 0; 
   6.137 +	      i3 < ((CONFIG_PAGING_LEVELS==4) ? L3_PAGETABLE_ENTRIES : 8); 
   6.138 +	      i3++ )
   6.139 +	{
   6.140 +	    if ( !(l3e_get_flags(l3e[i3]) & _PAGE_PRESENT) )
   6.141 +	    {
   6.142 +		continue;
   6.143 +	    }
   6.144 +	    l2e = map_domain_page(mfn_x(_mfn(l3e_get_pfn(l3e[i3]))));
   6.145 +#endif /* all levels... */
   6.146 +	    for ( i2 = 0; i2 < L2_PAGETABLE_ENTRIES; i2++ )
   6.147 +	    {
   6.148 +		if ( !(l2e_get_flags(l2e[i2]) & _PAGE_PRESENT) )
   6.149 +		{
   6.150 +		    continue;
   6.151 +		}
   6.152 +		l1e = map_domain_page(mfn_x(_mfn(l2e_get_pfn(l2e[i2]))));
   6.153 +		
   6.154 +		for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++, gfn++ )
   6.155 +		{
   6.156 +		    if ( !(l1e_get_flags(l1e[i1]) & _PAGE_PRESENT) )
   6.157 +			continue;
   6.158 +		    mfn = l1e_get_pfn(l1e[i1]);
   6.159 +		    gfn = get_gpfn_from_mfn(mfn);
   6.160 +		    /* create a new 1le entry using l1e_flags */
   6.161 +		    l1e_content = l1e_from_pfn(mfn, l1e_flags);
   6.162 +		    paging_write_p2m_entry(d, gfn, &l1e[i1], l1e_content, 1);
   6.163 +		}
   6.164 +		unmap_domain_page(l1e);
   6.165 +	    }
   6.166 +#if CONFIG_PAGING_LEVELS >= 3
   6.167 +	    unmap_domain_page(l2e);
   6.168 +	}
   6.169 +#if CONFIG_PAGING_LEVELS >= 4
   6.170 +	unmap_domain_page(l3e);
   6.171 +    }
   6.172 +#endif
   6.173 +#endif
   6.174 +
   6.175 +#if CONFIG_PAGING_LEVELS == 4
   6.176 +    unmap_domain_page(l4e);
   6.177 +#elif CONFIG_PAGING_LEVELS == 3
   6.178 +    unmap_domain_page(l3e);
   6.179 +#else /* CONFIG_PAGING_LEVELS == 2 */
   6.180 +    unmap_domain_page(l2e);
   6.181 +#endif
   6.182 +
   6.183 +    p2m_unlock(d);
   6.184 +}
   6.185 +
   6.186 +/* This function traces through P2M table and modifies l1e flags of a specific
   6.187 + * gpa.
   6.188 + */
   6.189 +int p2m_set_flags(struct domain *d, paddr_t gpa, u32 l1e_flags)
   6.190 +{
   6.191 +    unsigned long gfn;
   6.192 +    mfn_t mfn;
   6.193 +
   6.194 +    p2m_lock(d);
   6.195 +
   6.196 +    gfn = gpa >> PAGE_SHIFT;
   6.197 +    mfn = gfn_to_mfn(d, gfn);
   6.198 +    if ( mfn_valid(mfn) )
   6.199 +        set_p2m_entry(d, gfn, mfn, l1e_flags);
   6.200 +    
   6.201 +    p2m_unlock(d);
   6.202 +
   6.203 +    return 1;
   6.204 +}
   6.205  
   6.206  /*
   6.207   * Local variables:
     7.1 --- a/xen/arch/x86/mm/paging.c	Mon Jun 11 11:37:10 2007 +0100
     7.2 +++ b/xen/arch/x86/mm/paging.c	Mon Jun 11 14:35:52 2007 +0100
     7.3 @@ -25,6 +25,7 @@
     7.4  #include <asm/shadow.h>
     7.5  #include <asm/p2m.h>
     7.6  #include <asm/hap.h>
     7.7 +#include <asm/guest_access.h>
     7.8  
     7.9  /* Xen command-line option to enable hardware-assisted paging */
    7.10  int opt_hap_enabled;
    7.11 @@ -41,7 +42,279 @@ boolean_param("hap", opt_hap_enabled);
    7.12              debugtrace_printk("pgdebug: %s(): " _f, __func__, ##_a); \
    7.13      } while (0)
    7.14  
    7.15 +/************************************************/
    7.16 +/*              LOG DIRTY SUPPORT               */
    7.17 +/************************************************/
    7.18 +/* Override macros from asm/page.h to make them work with mfn_t */
    7.19 +#undef mfn_to_page
    7.20 +#define mfn_to_page(_m) (frame_table + mfn_x(_m))
    7.21 +#undef mfn_valid
    7.22 +#define mfn_valid(_mfn) (mfn_x(_mfn) < max_page)
    7.23 +#undef page_to_mfn
    7.24 +#define page_to_mfn(_pg) (_mfn((_pg) - frame_table))
    7.25  
    7.26 +#define log_dirty_lock_init(_d)                                   \
    7.27 +    do {                                                          \
    7.28 +        spin_lock_init(&(_d)->arch.paging.log_dirty.lock);        \
    7.29 +        (_d)->arch.paging.log_dirty.locker = -1;                  \
    7.30 +        (_d)->arch.paging.log_dirty.locker_function = "nobody";   \
    7.31 +    } while (0)
    7.32 +
    7.33 +#define log_dirty_lock(_d)                                                   \
    7.34 +    do {                                                                     \
    7.35 +        if (unlikely((_d)->arch.paging.log_dirty.locker==current->processor))\
    7.36 +        {                                                                    \
    7.37 +            printk("Error: paging log dirty lock held by %s\n",              \
    7.38 +                   (_d)->arch.paging.log_dirty.locker_function);             \
    7.39 +            BUG();                                                           \
    7.40 +        }                                                                    \
    7.41 +        spin_lock(&(_d)->arch.paging.log_dirty.lock);                        \
    7.42 +        ASSERT((_d)->arch.paging.log_dirty.locker == -1);                    \
    7.43 +        (_d)->arch.paging.log_dirty.locker = current->processor;             \
    7.44 +        (_d)->arch.paging.log_dirty.locker_function = __func__;              \
    7.45 +    } while (0)
    7.46 +
    7.47 +#define log_dirty_unlock(_d)                                              \
    7.48 +    do {                                                                  \
    7.49 +        ASSERT((_d)->arch.paging.log_dirty.locker == current->processor); \
    7.50 +        (_d)->arch.paging.log_dirty.locker = -1;                          \
    7.51 +        (_d)->arch.paging.log_dirty.locker_function = "nobody";           \
    7.52 +        spin_unlock(&(_d)->arch.paging.log_dirty.lock);                   \
    7.53 +    } while (0)
    7.54 +
    7.55 +/* allocate bitmap resources for log dirty */
    7.56 +int paging_alloc_log_dirty_bitmap(struct domain *d)
    7.57 +{
    7.58 +    ASSERT(d->arch.paging.log_dirty.bitmap == NULL);
    7.59 +    d->arch.paging.log_dirty.bitmap_size =
    7.60 +        (domain_get_maximum_gpfn(d) + BITS_PER_LONG) & ~(BITS_PER_LONG - 1);
    7.61 +    d->arch.paging.log_dirty.bitmap = 
    7.62 +        xmalloc_array(unsigned long,
    7.63 +                      d->arch.paging.log_dirty.bitmap_size / BITS_PER_LONG);
    7.64 +    if ( d->arch.paging.log_dirty.bitmap == NULL )
    7.65 +    {
    7.66 +        d->arch.paging.log_dirty.bitmap_size = 0;
    7.67 +        return -ENOMEM;
    7.68 +    }
    7.69 +    memset(d->arch.paging.log_dirty.bitmap, 0,
    7.70 +           d->arch.paging.log_dirty.bitmap_size/8);
    7.71 +
    7.72 +    return 0;
    7.73 +}
    7.74 +
    7.75 +/* free bitmap resources */
    7.76 +void paging_free_log_dirty_bitmap(struct domain *d)
    7.77 +{
    7.78 +    d->arch.paging.log_dirty.bitmap_size = 0;
    7.79 +    if ( d->arch.paging.log_dirty.bitmap )
    7.80 +    {
    7.81 +        xfree(d->arch.paging.log_dirty.bitmap);
    7.82 +        d->arch.paging.log_dirty.bitmap = NULL;
    7.83 +    }
    7.84 +}
    7.85 +
    7.86 +int paging_log_dirty_enable(struct domain *d)
    7.87 +{
    7.88 +    int ret;
    7.89 +
    7.90 +    domain_pause(d);
    7.91 +    log_dirty_lock(d);
    7.92 +
    7.93 +    if ( paging_mode_log_dirty(d) )
    7.94 +    {
    7.95 +        ret = -EINVAL;
    7.96 +        goto out;
    7.97 +    }
    7.98 +
    7.99 +    ret = paging_alloc_log_dirty_bitmap(d);
   7.100 +    if ( ret != 0 )
   7.101 +    {
   7.102 +        paging_free_log_dirty_bitmap(d);
   7.103 +        goto out;
   7.104 +    }
   7.105 +
   7.106 +    ret = d->arch.paging.log_dirty.enable_log_dirty(d);
   7.107 +    if ( ret != 0 )
   7.108 +        paging_free_log_dirty_bitmap(d);
   7.109 +
   7.110 + out:
   7.111 +    log_dirty_unlock(d);
   7.112 +    domain_unpause(d);
   7.113 +    return ret;
   7.114 +}
   7.115 +
   7.116 +int paging_log_dirty_disable(struct domain *d)
   7.117 +{
   7.118 +    int ret;
   7.119 +
   7.120 +    domain_pause(d);
   7.121 +    log_dirty_lock(d);
   7.122 +    ret = d->arch.paging.log_dirty.disable_log_dirty(d);
   7.123 +    if ( !paging_mode_log_dirty(d) )
   7.124 +        paging_free_log_dirty_bitmap(d);
   7.125 +    log_dirty_unlock(d);
   7.126 +    domain_unpause(d);
   7.127 +
   7.128 +    return ret;
   7.129 +}
   7.130 +
   7.131 +/* Mark a page as dirty */
   7.132 +void paging_mark_dirty(struct domain *d, unsigned long guest_mfn)
   7.133 +{
   7.134 +    unsigned long pfn;
   7.135 +    mfn_t gmfn;
   7.136 +
   7.137 +    gmfn = _mfn(guest_mfn);
   7.138 +
   7.139 +    if ( !paging_mode_log_dirty(d) || !mfn_valid(gmfn) )
   7.140 +        return;
   7.141 +
   7.142 +    log_dirty_lock(d);
   7.143 +
   7.144 +    ASSERT(d->arch.paging.log_dirty.bitmap != NULL);
   7.145 +
   7.146 +    /* We /really/ mean PFN here, even for non-translated guests. */
   7.147 +    pfn = get_gpfn_from_mfn(mfn_x(gmfn));
   7.148 +
   7.149 +    /*
   7.150 +     * Values with the MSB set denote MFNs that aren't really part of the 
   7.151 +     * domain's pseudo-physical memory map (e.g., the shared info frame).
   7.152 +     * Nothing to do here...
   7.153 +     */
   7.154 +    if ( unlikely(!VALID_M2P(pfn)) )
   7.155 +        return;
   7.156 +
   7.157 +    if ( likely(pfn < d->arch.paging.log_dirty.bitmap_size) ) 
   7.158 +    { 
   7.159 +        if ( !__test_and_set_bit(pfn, d->arch.paging.log_dirty.bitmap) )
   7.160 +        {
   7.161 +            PAGING_DEBUG(LOGDIRTY, 
   7.162 +                         "marked mfn %" PRI_mfn " (pfn=%lx), dom %d\n",
   7.163 +                         mfn_x(gmfn), pfn, d->domain_id);
   7.164 +            d->arch.paging.log_dirty.dirty_count++;
   7.165 +        }
   7.166 +    }
   7.167 +    else
   7.168 +    {
   7.169 +        PAGING_PRINTK("mark_dirty OOR! "
   7.170 +                      "mfn=%" PRI_mfn " pfn=%lx max=%x (dom %d)\n"
   7.171 +                      "owner=%d c=%08x t=%" PRtype_info "\n",
   7.172 +                      mfn_x(gmfn), 
   7.173 +                      pfn, 
   7.174 +                      d->arch.paging.log_dirty.bitmap_size,
   7.175 +                      d->domain_id,
   7.176 +                      (page_get_owner(mfn_to_page(gmfn))
   7.177 +                       ? page_get_owner(mfn_to_page(gmfn))->domain_id
   7.178 +                       : -1),
   7.179 +                      mfn_to_page(gmfn)->count_info, 
   7.180 +                      mfn_to_page(gmfn)->u.inuse.type_info);
   7.181 +    }
   7.182 +    
   7.183 +    log_dirty_unlock(d);
   7.184 +}
   7.185 +
   7.186 +/* Read a domain's log-dirty bitmap and stats.  If the operation is a CLEAN, 
   7.187 + * clear the bitmap and stats as well. */
   7.188 +int paging_log_dirty_op(struct domain *d, struct xen_domctl_shadow_op *sc)
   7.189 +{
   7.190 +    int i, rv = 0, clean = 0, peek = 1;
   7.191 +
   7.192 +    domain_pause(d);
   7.193 +    log_dirty_lock(d);
   7.194 +
   7.195 +    clean = (sc->op == XEN_DOMCTL_SHADOW_OP_CLEAN);
   7.196 +
   7.197 +    PAGING_DEBUG(LOGDIRTY, "log-dirty %s: dom %u faults=%u dirty=%u\n", 
   7.198 +                 (clean) ? "clean" : "peek",
   7.199 +                 d->domain_id,
   7.200 +                 d->arch.paging.log_dirty.fault_count, 
   7.201 +                 d->arch.paging.log_dirty.dirty_count);
   7.202 +
   7.203 +    sc->stats.fault_count = d->arch.paging.log_dirty.fault_count;
   7.204 +    sc->stats.dirty_count = d->arch.paging.log_dirty.dirty_count;
   7.205 +    
   7.206 +    if ( clean )
   7.207 +    {
   7.208 +        d->arch.paging.log_dirty.fault_count = 0;
   7.209 +        d->arch.paging.log_dirty.dirty_count = 0;
   7.210 +
   7.211 +        /* We need to further call clean_dirty_bitmap() functions of specific
   7.212 +         * paging modes (shadow or hap).
   7.213 +         */
   7.214 +        d->arch.paging.log_dirty.clean_dirty_bitmap(d);
   7.215 +    }
   7.216 +
   7.217 +    if ( guest_handle_is_null(sc->dirty_bitmap) )
   7.218 +        /* caller may have wanted just to clean the state or access stats. */
   7.219 +        peek = 0;
   7.220 +
   7.221 +    if ( (peek || clean) && (d->arch.paging.log_dirty.bitmap == NULL) )
   7.222 +    {
   7.223 +        rv = -EINVAL; /* perhaps should be ENOMEM? */
   7.224 +        goto out;
   7.225 +    }
   7.226 + 
   7.227 +    if ( sc->pages > d->arch.paging.log_dirty.bitmap_size )
   7.228 +        sc->pages = d->arch.paging.log_dirty.bitmap_size;
   7.229 +
   7.230 +#define CHUNK (8*1024) /* Transfer and clear in 1kB chunks for L1 cache. */
   7.231 +    for ( i = 0; i < sc->pages; i += CHUNK )
   7.232 +    {
   7.233 +        int bytes = ((((sc->pages - i) > CHUNK)
   7.234 +                      ? CHUNK
   7.235 +                      : (sc->pages - i)) + 7) / 8;
   7.236 +
   7.237 +        if ( likely(peek) )
   7.238 +        {
   7.239 +            if ( copy_to_guest_offset(
   7.240 +                sc->dirty_bitmap, i/8,
   7.241 +                (uint8_t *)d->arch.paging.log_dirty.bitmap + (i/8), bytes) )
   7.242 +            {
   7.243 +                rv = -EFAULT;
   7.244 +                goto out;
   7.245 +            }
   7.246 +        }
   7.247 +
   7.248 +        if ( clean )
   7.249 +            memset((uint8_t *)d->arch.paging.log_dirty.bitmap + (i/8), 0, bytes);
   7.250 +    }
   7.251 +#undef CHUNK
   7.252 +
   7.253 + out:
   7.254 +    log_dirty_unlock(d);
   7.255 +    domain_unpause(d);
   7.256 +    return rv;
   7.257 +}
   7.258 +
   7.259 +
   7.260 +/* Note that this function takes three function pointers. Callers must supply
   7.261 + * these functions for log dirty code to call. This function usually is 
   7.262 + * invoked when paging is enabled. Check shadow_enable() and hap_enable() for 
   7.263 + * reference.
   7.264 + */
   7.265 +void paging_log_dirty_init(struct domain *d,
   7.266 +                           int    (*enable_log_dirty)(struct domain *d),
   7.267 +                           int    (*disable_log_dirty)(struct domain *d),
   7.268 +                           void   (*clean_dirty_bitmap)(struct domain *d))
   7.269 +{
   7.270 +    /* We initialize log dirty lock first */
   7.271 +    log_dirty_lock_init(d);
   7.272 +    
   7.273 +    d->arch.paging.log_dirty.enable_log_dirty = enable_log_dirty;
   7.274 +    d->arch.paging.log_dirty.disable_log_dirty = disable_log_dirty;
   7.275 +    d->arch.paging.log_dirty.clean_dirty_bitmap = clean_dirty_bitmap;
   7.276 +}
   7.277 +
   7.278 +/* This function fress log dirty bitmap resources. */
   7.279 +void paging_log_dirty_teardown(struct domain*d)
   7.280 +{
   7.281 +    log_dirty_lock(d);
   7.282 +    paging_free_log_dirty_bitmap(d);
   7.283 +    log_dirty_unlock(d);
   7.284 +}
   7.285 +/************************************************/
   7.286 +/*           CODE FOR PAGING SUPPORT            */
   7.287 +/************************************************/
   7.288  /* Domain paging struct initialization. */
   7.289  void paging_domain_init(struct domain *d)
   7.290  {
   7.291 @@ -65,16 +338,68 @@ void paging_vcpu_init(struct vcpu *v)
   7.292  int paging_domctl(struct domain *d, xen_domctl_shadow_op_t *sc,
   7.293                    XEN_GUEST_HANDLE(void) u_domctl)
   7.294  {
   7.295 +    int rc;
   7.296 +
   7.297 +    if ( unlikely(d == current->domain) )
   7.298 +    {
   7.299 +        gdprintk(XENLOG_INFO, "Dom %u tried to do a paging op on itself.\n",
   7.300 +                 d->domain_id);
   7.301 +        return -EINVAL;
   7.302 +    }
   7.303 +    
   7.304 +    if ( unlikely(d->is_dying) )
   7.305 +    {
   7.306 +        gdprintk(XENLOG_INFO, "Ignoring paging op on dying domain %u\n",
   7.307 +                 d->domain_id);
   7.308 +        return 0;
   7.309 +    }
   7.310 +
   7.311 +    if ( unlikely(d->vcpu[0] == NULL) )
   7.312 +    {
   7.313 +        PAGING_ERROR("Paging op on a domain (%u) with no vcpus\n",
   7.314 +                     d->domain_id);
   7.315 +        return -EINVAL;
   7.316 +    }
   7.317 +    
   7.318 +    /* Code to handle log-dirty. Note that some log dirty operations
   7.319 +     * piggy-back on shadow operations. For example, when 
   7.320 +     * XEN_DOMCTL_SHADOW_OP_OFF is called, it first checks whether log dirty
   7.321 +     * mode is enabled. If does, we disables log dirty and continues with 
   7.322 +     * shadow code. For this reason, we need to further dispatch domctl 
   7.323 +     * to next-level paging code (shadow or hap).
   7.324 +     */
   7.325 +    switch ( sc->op )
   7.326 +    {
   7.327 +    case XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY:
   7.328 +        return paging_log_dirty_enable(d);	
   7.329 +	
   7.330 +    case XEN_DOMCTL_SHADOW_OP_ENABLE:	
   7.331 +        if ( sc->mode & XEN_DOMCTL_SHADOW_ENABLE_LOG_DIRTY )
   7.332 +            return paging_log_dirty_enable(d);
   7.333 +
   7.334 +    case XEN_DOMCTL_SHADOW_OP_OFF:
   7.335 +        if ( paging_mode_log_dirty(d) )
   7.336 +            if ( (rc = paging_log_dirty_disable(d)) != 0 ) 
   7.337 +                return rc;
   7.338 +
   7.339 +    case XEN_DOMCTL_SHADOW_OP_CLEAN:
   7.340 +    case XEN_DOMCTL_SHADOW_OP_PEEK:
   7.341 +	return paging_log_dirty_op(d, sc);
   7.342 +    }
   7.343 +	
   7.344      /* Here, dispatch domctl to the appropriate paging code */
   7.345      if ( opt_hap_enabled && is_hvm_domain(d) )
   7.346 -        return hap_domctl(d, sc, u_domctl);
   7.347 +	return hap_domctl(d, sc, u_domctl);
   7.348      else
   7.349 -        return shadow_domctl(d, sc, u_domctl);
   7.350 +	return shadow_domctl(d, sc, u_domctl);
   7.351  }
   7.352  
   7.353  /* Call when destroying a domain */
   7.354  void paging_teardown(struct domain *d)
   7.355  {
   7.356 +    /* clean up log dirty resources. */
   7.357 +    paging_log_dirty_teardown(d);
   7.358 +    
   7.359      if ( opt_hap_enabled && is_hvm_domain(d) )
   7.360          hap_teardown(d);
   7.361      else
     8.1 --- a/xen/arch/x86/mm/shadow/common.c	Mon Jun 11 11:37:10 2007 +0100
     8.2 +++ b/xen/arch/x86/mm/shadow/common.c	Mon Jun 11 14:35:52 2007 +0100
     8.3 @@ -87,8 +87,6 @@ static int __init shadow_audit_key_init(
     8.4  __initcall(shadow_audit_key_init);
     8.5  #endif /* SHADOW_AUDIT */
     8.6  
     8.7 -static void sh_free_log_dirty_bitmap(struct domain *d);
     8.8 -
     8.9  int _shadow_mode_refcounts(struct domain *d)
    8.10  {
    8.11      return shadow_mode_refcounts(d);
    8.12 @@ -541,7 +539,7 @@ sh_validate_guest_entry(struct vcpu *v, 
    8.13      int result = 0;
    8.14      struct page_info *page = mfn_to_page(gmfn);
    8.15  
    8.16 -    sh_mark_dirty(v->domain, gmfn);
    8.17 +    paging_mark_dirty(v->domain, mfn_x(gmfn));
    8.18      
    8.19      // Determine which types of shadows are affected, and update each.
    8.20      //
    8.21 @@ -2455,6 +2453,10 @@ int shadow_enable(struct domain *d, u32 
    8.22          }        
    8.23      }
    8.24  
    8.25 +    /* initialize log dirty here */
    8.26 +    paging_log_dirty_init(d, shadow_enable_log_dirty, 
    8.27 +                          shadow_disable_log_dirty, shadow_clean_dirty_bitmap);
    8.28 +
    8.29      /* Init the P2M table.  Must be done before we take the shadow lock 
    8.30       * to avoid possible deadlock. */
    8.31      if ( mode & PG_translate )
    8.32 @@ -2464,6 +2466,7 @@ int shadow_enable(struct domain *d, u32 
    8.33              goto out_unlocked;
    8.34      }
    8.35  
    8.36 +
    8.37      shadow_lock(d);
    8.38  
    8.39      /* Sanity check again with the lock held */
    8.40 @@ -2564,8 +2567,6 @@ void shadow_teardown(struct domain *d)
    8.41          /* Release the hash table back to xenheap */
    8.42          if (d->arch.paging.shadow.hash_table) 
    8.43              shadow_hash_teardown(d);
    8.44 -        /* Release the log-dirty bitmap of dirtied pages */
    8.45 -        sh_free_log_dirty_bitmap(d);
    8.46          /* Should not have any more memory held */
    8.47          SHADOW_PRINTK("teardown done."
    8.48                         "  Shadow pages total = %u, free = %u, p2m=%u\n",
    8.49 @@ -2724,98 +2725,6 @@ static int shadow_test_disable(struct do
    8.50      return ret;
    8.51  }
    8.52  
    8.53 -static int
    8.54 -sh_alloc_log_dirty_bitmap(struct domain *d)
    8.55 -{
    8.56 -    ASSERT(d->arch.paging.shadow.dirty_bitmap == NULL);
    8.57 -    d->arch.paging.shadow.dirty_bitmap_size =
    8.58 -        (domain_get_maximum_gpfn(d) + BITS_PER_LONG) & ~(BITS_PER_LONG - 1);
    8.59 -    d->arch.paging.shadow.dirty_bitmap =
    8.60 -        xmalloc_array(unsigned long,
    8.61 -                      d->arch.paging.shadow.dirty_bitmap_size / BITS_PER_LONG);
    8.62 -    if ( d->arch.paging.shadow.dirty_bitmap == NULL )
    8.63 -    {
    8.64 -        d->arch.paging.shadow.dirty_bitmap_size = 0;
    8.65 -        return -ENOMEM;
    8.66 -    }
    8.67 -    memset(d->arch.paging.shadow.dirty_bitmap, 0,
    8.68 -           d->arch.paging.shadow.dirty_bitmap_size/8);
    8.69 -
    8.70 -    return 0;
    8.71 -}
    8.72 -
    8.73 -static void
    8.74 -sh_free_log_dirty_bitmap(struct domain *d)
    8.75 -{
    8.76 -    d->arch.paging.shadow.dirty_bitmap_size = 0;
    8.77 -    if ( d->arch.paging.shadow.dirty_bitmap )
    8.78 -    {
    8.79 -        xfree(d->arch.paging.shadow.dirty_bitmap);
    8.80 -        d->arch.paging.shadow.dirty_bitmap = NULL;
    8.81 -    }
    8.82 -}
    8.83 -
    8.84 -static int shadow_log_dirty_enable(struct domain *d)
    8.85 -{
    8.86 -    int ret;
    8.87 -
    8.88 -    domain_pause(d);
    8.89 -    shadow_lock(d);
    8.90 -
    8.91 -    if ( shadow_mode_log_dirty(d) )
    8.92 -    {
    8.93 -        ret = -EINVAL;
    8.94 -        goto out;
    8.95 -    }
    8.96 -
    8.97 -    if ( shadow_mode_enabled(d) )
    8.98 -    {
    8.99 -        /* This domain already has some shadows: need to clear them out 
   8.100 -         * of the way to make sure that all references to guest memory are 
   8.101 -         * properly write-protected */
   8.102 -        shadow_blow_tables(d);
   8.103 -    }
   8.104 -
   8.105 -#if (SHADOW_OPTIMIZATIONS & SHOPT_LINUX_L3_TOPLEVEL)
   8.106 -    /* 32bit PV guests on 64bit xen behave like older 64bit linux: they
   8.107 -     * change an l4e instead of cr3 to switch tables.  Give them the
   8.108 -     * same optimization */
   8.109 -    if ( is_pv_32on64_domain(d) )
   8.110 -        d->arch.paging.shadow.opt_flags = SHOPT_LINUX_L3_TOPLEVEL;
   8.111 -#endif
   8.112 -
   8.113 -    ret = sh_alloc_log_dirty_bitmap(d);
   8.114 -    if ( ret != 0 )
   8.115 -    {
   8.116 -        sh_free_log_dirty_bitmap(d);
   8.117 -        goto out;
   8.118 -    }
   8.119 -
   8.120 -    ret = shadow_one_bit_enable(d, PG_log_dirty);
   8.121 -    if ( ret != 0 )
   8.122 -        sh_free_log_dirty_bitmap(d);
   8.123 -
   8.124 - out:
   8.125 -    shadow_unlock(d);
   8.126 -    domain_unpause(d);
   8.127 -    return ret;
   8.128 -}
   8.129 -
   8.130 -static int shadow_log_dirty_disable(struct domain *d)
   8.131 -{
   8.132 -    int ret;
   8.133 -
   8.134 -    domain_pause(d);
   8.135 -    shadow_lock(d);
   8.136 -    ret = shadow_one_bit_disable(d, PG_log_dirty);
   8.137 -    if ( !shadow_mode_log_dirty(d) )
   8.138 -        sh_free_log_dirty_bitmap(d);
   8.139 -    shadow_unlock(d);
   8.140 -    domain_unpause(d);
   8.141 -
   8.142 -    return ret;
   8.143 -}
   8.144 -
   8.145  /**************************************************************************/
   8.146  /* P2M map manipulations */
   8.147  
   8.148 @@ -2892,150 +2801,62 @@ void shadow_convert_to_log_dirty(struct 
   8.149      BUG();
   8.150  }
   8.151  
   8.152 -
   8.153 -/* Read a domain's log-dirty bitmap and stats.  
   8.154 - * If the operation is a CLEAN, clear the bitmap and stats as well. */
   8.155 -static int shadow_log_dirty_op(
   8.156 -    struct domain *d, struct xen_domctl_shadow_op *sc)
   8.157 +/* Shadow specific code which is called in paging_log_dirty_enable().
   8.158 + * Return 0 if no problem found.
   8.159 + */
   8.160 +int shadow_enable_log_dirty(struct domain *d)
   8.161  {
   8.162 -    int i, rv = 0, clean = 0, peek = 1;
   8.163 -
   8.164 -    domain_pause(d);
   8.165 +    int ret;
   8.166 +
   8.167 +    /* shadow lock is required here */
   8.168      shadow_lock(d);
   8.169 -
   8.170 -    clean = (sc->op == XEN_DOMCTL_SHADOW_OP_CLEAN);
   8.171 -
   8.172 -    SHADOW_DEBUG(LOGDIRTY, "log-dirty %s: dom %u faults=%u dirty=%u\n", 
   8.173 -                  (clean) ? "clean" : "peek",
   8.174 -                  d->domain_id,
   8.175 -                  d->arch.paging.shadow.fault_count, 
   8.176 -                  d->arch.paging.shadow.dirty_count);
   8.177 -
   8.178 -    sc->stats.fault_count = d->arch.paging.shadow.fault_count;
   8.179 -    sc->stats.dirty_count = d->arch.paging.shadow.dirty_count;
   8.180 -
   8.181 -    if ( clean )
   8.182 +    if ( shadow_mode_enabled(d) )
   8.183      {
   8.184 -        /* Need to revoke write access to the domain's pages again.
   8.185 -         * In future, we'll have a less heavy-handed approach to this,
   8.186 -         * but for now, we just unshadow everything except Xen. */
   8.187 +        /* This domain already has some shadows: need to clear them out 
   8.188 +         * of the way to make sure that all references to guest memory are 
   8.189 +         * properly write-protected */
   8.190          shadow_blow_tables(d);
   8.191 -
   8.192 -        d->arch.paging.shadow.fault_count = 0;
   8.193 -        d->arch.paging.shadow.dirty_count = 0;
   8.194      }
   8.195  
   8.196 -    if ( guest_handle_is_null(sc->dirty_bitmap) )
   8.197 -        /* caller may have wanted just to clean the state or access stats. */
   8.198 -        peek = 0;
   8.199 -
   8.200 -    if ( (peek || clean) && (d->arch.paging.shadow.dirty_bitmap == NULL) )
   8.201 -    {
   8.202 -        rv = -EINVAL; /* perhaps should be ENOMEM? */
   8.203 -        goto out;
   8.204 -    }
   8.205 - 
   8.206 -    if ( sc->pages > d->arch.paging.shadow.dirty_bitmap_size )
   8.207 -        sc->pages = d->arch.paging.shadow.dirty_bitmap_size;
   8.208 -
   8.209 -#define CHUNK (8*1024) /* Transfer and clear in 1kB chunks for L1 cache. */
   8.210 -    for ( i = 0; i < sc->pages; i += CHUNK )
   8.211 -    {
   8.212 -        int bytes = ((((sc->pages - i) > CHUNK)
   8.213 -                      ? CHUNK
   8.214 -                      : (sc->pages - i)) + 7) / 8;
   8.215 -
   8.216 -        if ( likely(peek) )
   8.217 -        {
   8.218 -            if ( copy_to_guest_offset(
   8.219 -                sc->dirty_bitmap, i/8,
   8.220 -                (uint8_t *)d->arch.paging.shadow.dirty_bitmap + (i/8), bytes) )
   8.221 -            {
   8.222 -                rv = -EFAULT;
   8.223 -                goto out;
   8.224 -            }
   8.225 -        }
   8.226 -
   8.227 -        if ( clean )
   8.228 -            memset((uint8_t *)d->arch.paging.shadow.dirty_bitmap + (i/8), 0, bytes);
   8.229 -    }
   8.230 -#undef CHUNK
   8.231 -
   8.232 - out:
   8.233 +#if (SHADOW_OPTIMIZATIONS & SHOPT_LINUX_L3_TOPLEVEL)
   8.234 +    /* 32bit PV guests on 64bit xen behave like older 64bit linux: they
   8.235 +     * change an l4e instead of cr3 to switch tables.  Give them the
   8.236 +     * same optimization */
   8.237 +    if ( is_pv_32on64_domain(d) )
   8.238 +        d->arch.paging.shadow.opt_flags = SHOPT_LINUX_L3_TOPLEVEL;
   8.239 +#endif
   8.240 +    
   8.241 +    ret = shadow_one_bit_enable(d, PG_log_dirty);
   8.242      shadow_unlock(d);
   8.243 -    domain_unpause(d);
   8.244 -    return rv;
   8.245 +
   8.246 +    return ret;
   8.247  }
   8.248  
   8.249 -
   8.250 -/* Mark a page as dirty */
   8.251 -void sh_mark_dirty(struct domain *d, mfn_t gmfn)
   8.252 +/* shadow specfic code which is called in paging_log_dirty_disable() */
   8.253 +int shadow_disable_log_dirty(struct domain *d)
   8.254  {
   8.255 -    unsigned long pfn;
   8.256 -    int do_locking;
   8.257 -
   8.258 -    if ( !shadow_mode_log_dirty(d) || !mfn_valid(gmfn) )
   8.259 -        return;
   8.260 -
   8.261 -    /* Although this is an externally visible function, we do not know
   8.262 -     * whether the shadow lock will be held when it is called (since it
   8.263 -     * can be called from __hvm_copy during emulation).
   8.264 -     * If the lock isn't held, take it for the duration of the call. */
   8.265 -    do_locking = !shadow_locked_by_me(d);
   8.266 -    if ( do_locking ) 
   8.267 -    { 
   8.268 -        shadow_lock(d);
   8.269 -        /* Check the mode again with the lock held */ 
   8.270 -        if ( unlikely(!shadow_mode_log_dirty(d)) )
   8.271 -        {
   8.272 -            shadow_unlock(d);
   8.273 -            return;
   8.274 -        }
   8.275 -    }
   8.276 -
   8.277 -    ASSERT(d->arch.paging.shadow.dirty_bitmap != NULL);
   8.278 -
   8.279 -    /* We /really/ mean PFN here, even for non-translated guests. */
   8.280 -    pfn = get_gpfn_from_mfn(mfn_x(gmfn));
   8.281 -
   8.282 -    /*
   8.283 -     * Values with the MSB set denote MFNs that aren't really part of the 
   8.284 -     * domain's pseudo-physical memory map (e.g., the shared info frame).
   8.285 -     * Nothing to do here...
   8.286 -     */
   8.287 -    if ( unlikely(!VALID_M2P(pfn)) )
   8.288 -        return;
   8.289 -
   8.290 -    /* N.B. Can use non-atomic TAS because protected by shadow_lock. */
   8.291 -    if ( likely(pfn < d->arch.paging.shadow.dirty_bitmap_size) ) 
   8.292 -    { 
   8.293 -        if ( !__test_and_set_bit(pfn, d->arch.paging.shadow.dirty_bitmap) )
   8.294 -        {
   8.295 -            SHADOW_DEBUG(LOGDIRTY, 
   8.296 -                          "marked mfn %" PRI_mfn " (pfn=%lx), dom %d\n",
   8.297 -                          mfn_x(gmfn), pfn, d->domain_id);
   8.298 -            d->arch.paging.shadow.dirty_count++;
   8.299 -        }
   8.300 -    }
   8.301 -    else
   8.302 -    {
   8.303 -        SHADOW_PRINTK("mark_dirty OOR! "
   8.304 -                       "mfn=%" PRI_mfn " pfn=%lx max=%x (dom %d)\n"
   8.305 -                       "owner=%d c=%08x t=%" PRtype_info "\n",
   8.306 -                       mfn_x(gmfn), 
   8.307 -                       pfn, 
   8.308 -                       d->arch.paging.shadow.dirty_bitmap_size,
   8.309 -                       d->domain_id,
   8.310 -                       (page_get_owner(mfn_to_page(gmfn))
   8.311 -                        ? page_get_owner(mfn_to_page(gmfn))->domain_id
   8.312 -                        : -1),
   8.313 -                       mfn_to_page(gmfn)->count_info, 
   8.314 -                       mfn_to_page(gmfn)->u.inuse.type_info);
   8.315 -    }
   8.316 -
   8.317 -    if ( do_locking ) shadow_unlock(d);
   8.318 +    int ret;
   8.319 +
   8.320 +    /* shadow lock is required here */    
   8.321 +    shadow_lock(d);
   8.322 +    ret = shadow_one_bit_disable(d, PG_log_dirty);
   8.323 +    shadow_unlock(d);
   8.324 +    
   8.325 +    return ret;
   8.326  }
   8.327  
   8.328 +/* This function is called when we CLEAN log dirty bitmap. See 
   8.329 + * paging_log_dirty_op() for details. 
   8.330 + */
   8.331 +void shadow_clean_dirty_bitmap(struct domain *d)
   8.332 +{
   8.333 +    shadow_lock(d);
   8.334 +    /* Need to revoke write access to the domain's pages again.
   8.335 +     * In future, we'll have a less heavy-handed approach to this,
   8.336 +     * but for now, we just unshadow everything except Xen. */
   8.337 +    shadow_blow_tables(d);
   8.338 +    shadow_unlock(d);
   8.339 +}
   8.340  /**************************************************************************/
   8.341  /* Shadow-control XEN_DOMCTL dispatcher */
   8.342  
   8.343 @@ -3045,33 +2866,9 @@ int shadow_domctl(struct domain *d,
   8.344  {
   8.345      int rc, preempted = 0;
   8.346  
   8.347 -    if ( unlikely(d == current->domain) )
   8.348 -    {
   8.349 -        gdprintk(XENLOG_INFO, "Dom %u tried to do a shadow op on itself.\n",
   8.350 -                 d->domain_id);
   8.351 -        return -EINVAL;
   8.352 -    }
   8.353 -
   8.354 -    if ( unlikely(d->is_dying) )
   8.355 -    {
   8.356 -        gdprintk(XENLOG_INFO, "Ignoring shadow op on dying domain %u\n",
   8.357 -                 d->domain_id);
   8.358 -        return 0;
   8.359 -    }
   8.360 -
   8.361 -    if ( unlikely(d->vcpu[0] == NULL) )
   8.362 -    {
   8.363 -        SHADOW_ERROR("Shadow op on a domain (%u) with no vcpus\n",
   8.364 -                     d->domain_id);
   8.365 -        return -EINVAL;
   8.366 -    }
   8.367 -
   8.368      switch ( sc->op )
   8.369      {
   8.370      case XEN_DOMCTL_SHADOW_OP_OFF:
   8.371 -        if ( shadow_mode_log_dirty(d) )
   8.372 -            if ( (rc = shadow_log_dirty_disable(d)) != 0 ) 
   8.373 -                return rc;
   8.374          if ( d->arch.paging.mode == PG_SH_enable )
   8.375              if ( (rc = shadow_test_disable(d)) != 0 ) 
   8.376                  return rc;
   8.377 @@ -3080,19 +2877,10 @@ int shadow_domctl(struct domain *d,
   8.378      case XEN_DOMCTL_SHADOW_OP_ENABLE_TEST:
   8.379          return shadow_test_enable(d);
   8.380  
   8.381 -    case XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY:
   8.382 -        return shadow_log_dirty_enable(d);
   8.383 -
   8.384      case XEN_DOMCTL_SHADOW_OP_ENABLE_TRANSLATE:
   8.385          return shadow_enable(d, PG_refcounts|PG_translate);
   8.386  
   8.387 -    case XEN_DOMCTL_SHADOW_OP_CLEAN:
   8.388 -    case XEN_DOMCTL_SHADOW_OP_PEEK:
   8.389 -        return shadow_log_dirty_op(d, sc);
   8.390 -
   8.391      case XEN_DOMCTL_SHADOW_OP_ENABLE:
   8.392 -        if ( sc->mode & XEN_DOMCTL_SHADOW_ENABLE_LOG_DIRTY )
   8.393 -            return shadow_log_dirty_enable(d);
   8.394          return shadow_enable(d, sc->mode << PG_mode_shift);
   8.395  
   8.396      case XEN_DOMCTL_SHADOW_OP_GET_ALLOCATION:
     9.1 --- a/xen/arch/x86/mm/shadow/multi.c	Mon Jun 11 11:37:10 2007 +0100
     9.2 +++ b/xen/arch/x86/mm/shadow/multi.c	Mon Jun 11 14:35:52 2007 +0100
     9.3 @@ -457,7 +457,7 @@ static u32 guest_set_ad_bits(struct vcpu
     9.4      }
     9.5  
     9.6      /* Set the bit(s) */
     9.7 -    sh_mark_dirty(v->domain, gmfn);
     9.8 +    paging_mark_dirty(v->domain, mfn_x(gmfn));
     9.9      SHADOW_DEBUG(A_AND_D, "gfn = %" SH_PRI_gfn ", "
    9.10                   "old flags = %#x, new flags = %#x\n", 
    9.11                   gfn_x(guest_l1e_get_gfn(*ep)), guest_l1e_get_flags(*ep), 
    9.12 @@ -717,7 +717,7 @@ static always_inline void
    9.13      if ( unlikely((level == 1) && shadow_mode_log_dirty(d)) )
    9.14      {
    9.15          if ( ft & FETCH_TYPE_WRITE ) 
    9.16 -            sh_mark_dirty(d, target_mfn);
    9.17 +            paging_mark_dirty(d, mfn_x(target_mfn));
    9.18          else if ( !sh_mfn_is_dirty(d, target_mfn) )
    9.19              sflags &= ~_PAGE_RW;
    9.20      }
    9.21 @@ -2856,7 +2856,7 @@ static int sh_page_fault(struct vcpu *v,
    9.22      }
    9.23  
    9.24      perfc_incr(shadow_fault_fixed);
    9.25 -    d->arch.paging.shadow.fault_count++;
    9.26 +    d->arch.paging.log_dirty.fault_count++;
    9.27      reset_early_unshadow(v);
    9.28  
    9.29   done:
    9.30 @@ -4058,7 +4058,7 @@ sh_x86_emulate_write(struct vcpu *v, uns
    9.31      else
    9.32          reset_early_unshadow(v);
    9.33      
    9.34 -    sh_mark_dirty(v->domain, mfn);
    9.35 +    paging_mark_dirty(v->domain, mfn_x(mfn));
    9.36  
    9.37      sh_unmap_domain_page(addr);
    9.38      shadow_audit_tables(v);
    9.39 @@ -4114,7 +4114,7 @@ sh_x86_emulate_cmpxchg(struct vcpu *v, u
    9.40      else
    9.41          reset_early_unshadow(v);
    9.42  
    9.43 -    sh_mark_dirty(v->domain, mfn);
    9.44 +    paging_mark_dirty(v->domain, mfn_x(mfn));
    9.45  
    9.46      sh_unmap_domain_page(addr);
    9.47      shadow_audit_tables(v);
    9.48 @@ -4158,7 +4158,7 @@ sh_x86_emulate_cmpxchg8b(struct vcpu *v,
    9.49      else
    9.50          reset_early_unshadow(v);
    9.51  
    9.52 -    sh_mark_dirty(v->domain, mfn);
    9.53 +    paging_mark_dirty(v->domain, mfn_x(mfn));
    9.54  
    9.55      sh_unmap_domain_page(addr);
    9.56      shadow_audit_tables(v);
    10.1 --- a/xen/arch/x86/mm/shadow/private.h	Mon Jun 11 11:37:10 2007 +0100
    10.2 +++ b/xen/arch/x86/mm/shadow/private.h	Mon Jun 11 14:35:52 2007 +0100
    10.3 @@ -496,13 +496,13 @@ sh_mfn_is_dirty(struct domain *d, mfn_t 
    10.4  {
    10.5      unsigned long pfn;
    10.6      ASSERT(shadow_mode_log_dirty(d));
    10.7 -    ASSERT(d->arch.paging.shadow.dirty_bitmap != NULL);
    10.8 +    ASSERT(d->arch.paging.log_dirty.bitmap != NULL);
    10.9  
   10.10      /* We /really/ mean PFN here, even for non-translated guests. */
   10.11      pfn = get_gpfn_from_mfn(mfn_x(gmfn));
   10.12      if ( likely(VALID_M2P(pfn))
   10.13 -         && likely(pfn < d->arch.paging.shadow.dirty_bitmap_size) 
   10.14 -         && test_bit(pfn, d->arch.paging.shadow.dirty_bitmap) )
   10.15 +         && likely(pfn < d->arch.paging.log_dirty.bitmap_size) 
   10.16 +         && test_bit(pfn, d->arch.paging.log_dirty.bitmap) )
   10.17          return 1;
   10.18  
   10.19      return 0;
    11.1 --- a/xen/include/asm-x86/domain.h	Mon Jun 11 11:37:10 2007 +0100
    11.2 +++ b/xen/include/asm-x86/domain.h	Mon Jun 11 14:35:52 2007 +0100
    11.3 @@ -92,14 +92,6 @@ struct shadow_domain {
    11.4  
    11.5      /* Fast MMIO path heuristic */
    11.6      int has_fast_mmio_entries;
    11.7 -
    11.8 -    /* Shadow log-dirty bitmap */
    11.9 -    unsigned long *dirty_bitmap;
   11.10 -    unsigned int dirty_bitmap_size;  /* in pages, bit per page */
   11.11 -
   11.12 -    /* Shadow log-dirty mode stats */
   11.13 -    unsigned int fault_count;
   11.14 -    unsigned int dirty_count;
   11.15  };
   11.16  
   11.17  struct shadow_vcpu {
   11.18 @@ -134,7 +126,6 @@ struct hap_domain {
   11.19  /************************************************/
   11.20  /*       p2m handling                           */
   11.21  /************************************************/
   11.22 -
   11.23  struct p2m_domain {
   11.24      /* Lock that protects updates to the p2m */
   11.25      spinlock_t         lock;
   11.26 @@ -156,16 +147,36 @@ struct p2m_domain {
   11.27  /************************************************/
   11.28  /*       common paging data structure           */
   11.29  /************************************************/
   11.30 -struct paging_domain {
   11.31 -    u32               mode;  /* flags to control paging operation */
   11.32 +struct log_dirty_domain {
   11.33 +    /* log-dirty lock */
   11.34 +    spinlock_t     lock;
   11.35 +    int            locker; /* processor that holds the lock */
   11.36 +    const char    *locker_function; /* func that took it */
   11.37  
   11.38 -    /* extension for shadow paging support */
   11.39 -    struct shadow_domain shadow;
   11.40 +    /* log-dirty bitmap to record dirty pages */
   11.41 +    unsigned long *bitmap;
   11.42 +    unsigned int   bitmap_size;  /* in pages, bit per page */
   11.43  
   11.44 -    /* Other paging assistance code will have structs here */
   11.45 -    struct hap_domain    hap;
   11.46 +    /* log-dirty mode stats */
   11.47 +    unsigned int   fault_count;
   11.48 +    unsigned int   dirty_count;
   11.49 +
   11.50 +    /* functions which are paging mode specific */
   11.51 +    int            (*enable_log_dirty   )(struct domain *d);
   11.52 +    int            (*disable_log_dirty  )(struct domain *d);
   11.53 +    void           (*clean_dirty_bitmap )(struct domain *d);
   11.54  };
   11.55  
   11.56 +struct paging_domain {
   11.57 +    /* flags to control paging operation */
   11.58 +    u32                     mode;
   11.59 +    /* extension for shadow paging support */
   11.60 +    struct shadow_domain    shadow;
   11.61 +    /* extension for hardware-assited paging */
   11.62 +    struct hap_domain       hap;
   11.63 +    /* log dirty support */
   11.64 +    struct log_dirty_domain log_dirty;
   11.65 +};
   11.66  struct paging_vcpu {
   11.67      /* Pointers to mode-specific entry points. */
   11.68      struct paging_mode *mode;
    12.1 --- a/xen/include/asm-x86/grant_table.h	Mon Jun 11 11:37:10 2007 +0100
    12.2 +++ b/xen/include/asm-x86/grant_table.h	Mon Jun 11 14:35:52 2007 +0100
    12.3 @@ -31,7 +31,7 @@ int replace_grant_host_mapping(
    12.4  #define gnttab_shared_gmfn(d, t, i)                     \
    12.5      (mfn_to_gmfn(d, gnttab_shared_mfn(d, t, i)))
    12.6  
    12.7 -#define gnttab_mark_dirty(d, f) mark_dirty((d), (f))
    12.8 +#define gnttab_mark_dirty(d, f) paging_mark_dirty((d), (f))
    12.9  
   12.10  static inline void gnttab_clear_flag(unsigned long nr, uint16_t *addr)
   12.11  {
    13.1 --- a/xen/include/asm-x86/p2m.h	Mon Jun 11 11:37:10 2007 +0100
    13.2 +++ b/xen/include/asm-x86/p2m.h	Mon Jun 11 14:35:52 2007 +0100
    13.3 @@ -129,6 +129,11 @@ void guest_physmap_add_page(struct domai
    13.4  void guest_physmap_remove_page(struct domain *d, unsigned long gfn,
    13.5                                 unsigned long mfn);
    13.6  
    13.7 +/* set P2M table l1e flags */
    13.8 +void p2m_set_flags_global(struct domain *d, u32 l1e_flags);
    13.9 +
   13.10 +/* set P2M table l1e flags for a gpa */
   13.11 +int p2m_set_flags(struct domain *d, paddr_t gpa, u32 l1e_flags);
   13.12  
   13.13  #endif /* _XEN_P2M_H */
   13.14  
    14.1 --- a/xen/include/asm-x86/paging.h	Mon Jun 11 11:37:10 2007 +0100
    14.2 +++ b/xen/include/asm-x86/paging.h	Mon Jun 11 14:35:52 2007 +0100
    14.3 @@ -63,6 +63,9 @@
    14.4  #define paging_mode_translate(_d) ((_d)->arch.paging.mode & PG_translate)
    14.5  #define paging_mode_external(_d)  ((_d)->arch.paging.mode & PG_external)
    14.6  
    14.7 +/* flags used for paging debug */
    14.8 +#define PAGING_DEBUG_LOGDIRTY 0
    14.9 +
   14.10  /******************************************************************************
   14.11   * The equivalent for a particular vcpu of a shadowed domain. */
   14.12  
   14.13 @@ -136,6 +139,29 @@ struct paging_mode {
   14.14      struct shadow_paging_mode shadow;
   14.15  };
   14.16  
   14.17 +/*****************************************************************************
   14.18 + * Log dirty code */
   14.19 +
   14.20 +/* allocate log dirty bitmap resource for recording dirty pages */
   14.21 +int paging_alloc_log_dirty_bitmap(struct domain *d);
   14.22 +
   14.23 +/* free log dirty bitmap resource */
   14.24 +void paging_free_log_dirty_bitmap(struct domain *d);
   14.25 +
   14.26 +/* enable log dirty */
   14.27 +int paging_log_dirty_enable(struct domain *d);
   14.28 +
   14.29 +/* disable log dirty */
   14.30 +int paging_log_dirty_disable(struct domain *d);
   14.31 +
   14.32 +/* log dirty initialization */
   14.33 +void paging_log_dirty_init(struct domain *d,
   14.34 +                           int  (*enable_log_dirty)(struct domain *d),
   14.35 +                           int  (*disable_log_dirty)(struct domain *d),
   14.36 +                           void (*clean_dirty_bitmap)(struct domain *d));
   14.37 +
   14.38 +/* mark a page as dirty */
   14.39 +void paging_mark_dirty(struct domain *d, unsigned long guest_mfn);
   14.40  
   14.41  /*****************************************************************************
   14.42   * Entry points into the paging-assistance code */
    15.1 --- a/xen/include/asm-x86/shadow.h	Mon Jun 11 11:37:10 2007 +0100
    15.2 +++ b/xen/include/asm-x86/shadow.h	Mon Jun 11 14:35:52 2007 +0100
    15.3 @@ -75,16 +75,14 @@ void shadow_teardown(struct domain *d);
    15.4  /* Call once all of the references to the domain have gone away */
    15.5  void shadow_final_teardown(struct domain *d);
    15.6  
    15.7 -/* Mark a page as dirty in the log-dirty bitmap: called when Xen 
    15.8 - * makes changes to guest memory on its behalf. */
    15.9 -void sh_mark_dirty(struct domain *d, mfn_t gmfn);
   15.10 -/* Cleaner version so we don't pepper shadow_mode tests all over the place */
   15.11 -static inline void mark_dirty(struct domain *d, unsigned long gmfn)
   15.12 -{
   15.13 -    if ( unlikely(shadow_mode_log_dirty(d)) )
   15.14 -        /* See the comment about locking in sh_mark_dirty */
   15.15 -        sh_mark_dirty(d, _mfn(gmfn));
   15.16 -}
   15.17 +/* shadow code to call when log dirty is enabled */
   15.18 +int shadow_enable_log_dirty(struct domain *d);
   15.19 +
   15.20 +/* shadow code to call when log dirty is disabled */
   15.21 +int shadow_disable_log_dirty(struct domain *d);
   15.22 +
   15.23 +/* shadow code to call when bitmap is being cleaned */
   15.24 +void shadow_clean_dirty_bitmap(struct domain *d);
   15.25  
   15.26  /* Update all the things that are derived from the guest's CR0/CR3/CR4.
   15.27   * Called to initialize paging structures if the paging mode