ia64/xen-unstable

changeset 17691:ff23c9a11085

Intel EPT: Add page shattering logic for EPT when a super-page gets partially freed.

Signed-off-by: Xin Xiaohui <xiaohui.xin@intel.com>
author Keir Fraser <keir.fraser@citrix.com>
date Wed May 21 11:02:51 2008 +0100 (2008-05-21)
parents ef019d230080
children 70ca37d22895
files xen/arch/x86/mm/hap/p2m-ept.c
line diff
     1.1 --- a/xen/arch/x86/mm/hap/p2m-ept.c	Wed May 21 10:59:49 2008 +0100
     1.2 +++ b/xen/arch/x86/mm/hap/p2m-ept.c	Wed May 21 11:02:51 2008 +0100
     1.3 @@ -49,10 +49,35 @@ static void ept_p2m_type_to_flags(ept_en
     1.4  
     1.5  #define GUEST_TABLE_NORMAL_PAGE 1
     1.6  #define GUEST_TABLE_SUPER_PAGE  2
     1.7 +#define GUEST_TABLE_SPLIT_PAGE  3
     1.8 +
     1.9 +static int ept_set_middle_entry(struct domain *d, ept_entry_t *ept_entry)
    1.10 +{
    1.11 +    struct page_info *pg;
    1.12 +
    1.13 +    pg = d->arch.p2m->alloc_page(d);
    1.14 +    if ( pg == NULL )
    1.15 +        return 0;
    1.16 +
    1.17 +    pg->count_info = 1;
    1.18 +    pg->u.inuse.type_info = 1 | PGT_validated;
    1.19 +    list_add_tail(&pg->list, &d->arch.p2m->pages);
    1.20 +
    1.21 +    ept_entry->emt = 0;
    1.22 +    ept_entry->sp_avail = 0;
    1.23 +    ept_entry->avail1 = 0;
    1.24 +    ept_entry->mfn = page_to_mfn(pg);
    1.25 +    ept_entry->rsvd = 0;
    1.26 +    ept_entry->avail2 = 0;
    1.27 +    /* last step */
    1.28 +    ept_entry->r = ept_entry->w = ept_entry->x = 1;
    1.29 +
    1.30 +    return 1;
    1.31 +}
    1.32  
    1.33  static int ept_next_level(struct domain *d, bool_t read_only,
    1.34                            ept_entry_t **table, unsigned long *gfn_remainder,
    1.35 -                          u32 shift)
    1.36 +                          u32 shift, int order)
    1.37  {
    1.38      ept_entry_t *ept_entry, *next;
    1.39      u32 index;
    1.40 @@ -63,27 +88,11 @@ static int ept_next_level(struct domain 
    1.41  
    1.42      if ( !(ept_entry->epte & 0x7) )
    1.43      {
    1.44 -        struct page_info *pg;
    1.45 -
    1.46          if ( read_only )
    1.47              return 0;
    1.48  
    1.49 -        pg = d->arch.p2m->alloc_page(d);
    1.50 -        if ( pg == NULL )
    1.51 +        if ( !ept_set_middle_entry(d, ept_entry) )
    1.52              return 0;
    1.53 -
    1.54 -        pg->count_info = 1;
    1.55 -        pg->u.inuse.type_info = 1 | PGT_validated;
    1.56 -        list_add_tail(&pg->list, &d->arch.p2m->pages);
    1.57 -
    1.58 -        ept_entry->emt = 0;
    1.59 -        ept_entry->sp_avail = 0;
    1.60 -        ept_entry->avail1 = 0;
    1.61 -        ept_entry->mfn = page_to_mfn(pg);
    1.62 -        ept_entry->rsvd = 0;
    1.63 -        ept_entry->avail2 = 0;
    1.64 -        /* last step */
    1.65 -        ept_entry->r = ept_entry->w = ept_entry->x = 1;
    1.66      }
    1.67  
    1.68      if ( !ept_entry->sp_avail )
    1.69 @@ -95,7 +104,12 @@ static int ept_next_level(struct domain 
    1.70          return GUEST_TABLE_NORMAL_PAGE;
    1.71      }
    1.72      else
    1.73 -        return GUEST_TABLE_SUPER_PAGE;
    1.74 +    {
    1.75 +        if ( order == shift || read_only )
    1.76 +            return GUEST_TABLE_SUPER_PAGE;
    1.77 +        else
    1.78 +            return GUEST_TABLE_SPLIT_PAGE;
    1.79 +    }
    1.80  }
    1.81  
    1.82  static int
    1.83 @@ -109,7 +123,9 @@ ept_set_entry(struct domain *d, unsigned
    1.84      int i, rv = 0, ret = 0;
    1.85      int walk_level = order / EPT_TABLE_ORDER;
    1.86  
    1.87 -    /* Should check if gfn obeys GAW here */
    1.88 +    /* we only support 4k and 2m pages now */
    1.89 +
    1.90 +    BUG_ON(order && order != EPT_TABLE_ORDER);
    1.91  
    1.92      if (  order != 0 )
    1.93          if ( (gfn & ((1UL << order) - 1)) )
    1.94 @@ -122,10 +138,10 @@ ept_set_entry(struct domain *d, unsigned
    1.95      for ( i = EPT_DEFAULT_GAW; i > walk_level; i-- )
    1.96      {
    1.97          ret = ept_next_level(d, 0, &table, &gfn_remainder,
    1.98 -          i * EPT_TABLE_ORDER);
    1.99 +          i * EPT_TABLE_ORDER, order);
   1.100          if ( !ret )
   1.101              goto out;
   1.102 -        else if ( ret == GUEST_TABLE_SUPER_PAGE )
   1.103 +        else if ( ret != GUEST_TABLE_NORMAL_PAGE )
   1.104              break;
   1.105      }
   1.106  
   1.107 @@ -135,35 +151,87 @@ ept_set_entry(struct domain *d, unsigned
   1.108  
   1.109      ept_entry = table + index;
   1.110  
   1.111 -    if ( mfn_valid(mfn_x(mfn)) || (p2mt == p2m_mmio_direct) )
   1.112 +    if ( ret != GUEST_TABLE_SPLIT_PAGE )
   1.113      {
   1.114 -        /* Track the highest gfn for which we have ever had a valid mapping */
   1.115 -        if ( gfn > d->arch.p2m->max_mapped_pfn )
   1.116 -            d->arch.p2m->max_mapped_pfn = gfn;
   1.117 +        if ( mfn_valid(mfn_x(mfn)) || (p2mt == p2m_mmio_direct) )
   1.118 +        {
   1.119 +            /* Track the highest gfn for which we have ever had a valid mapping */
   1.120 +            if ( gfn > d->arch.p2m->max_mapped_pfn )
   1.121 +                d->arch.p2m->max_mapped_pfn = gfn;
   1.122  
   1.123 -        ept_entry->emt = EPT_DEFAULT_MT;
   1.124 -        ept_entry->sp_avail = walk_level ? 1 : 0;
   1.125 +            ept_entry->emt = EPT_DEFAULT_MT;
   1.126 +            ept_entry->sp_avail = walk_level ? 1 : 0;
   1.127  
   1.128 -        if ( ret == GUEST_TABLE_SUPER_PAGE )
   1.129 -        {
   1.130 -            ept_entry->mfn = mfn_x(mfn) - offset;
   1.131 -            if ( ept_entry->avail1 == p2m_ram_logdirty &&
   1.132 -              p2mt == p2m_ram_rw )
   1.133 -                for ( i = 0; i < 512; i++ )
   1.134 -                    paging_mark_dirty(d, mfn_x(mfn)-offset+i);
   1.135 +            if ( ret == GUEST_TABLE_SUPER_PAGE )
   1.136 +            {
   1.137 +                ept_entry->mfn = mfn_x(mfn) - offset;
   1.138 +                if ( ept_entry->avail1 == p2m_ram_logdirty &&
   1.139 +                  p2mt == p2m_ram_rw )
   1.140 +                    for ( i = 0; i < 512; i++ )
   1.141 +                        paging_mark_dirty(d, mfn_x(mfn)-offset+i);
   1.142 +            }
   1.143 +            else
   1.144 +                ept_entry->mfn = mfn_x(mfn);
   1.145 +
   1.146 +            ept_entry->avail1 = p2mt;
   1.147 +            ept_entry->rsvd = 0;
   1.148 +            ept_entry->avail2 = 0;
   1.149 +            /* last step */
   1.150 +            ept_entry->r = ept_entry->w = ept_entry->x = 1;
   1.151 +            ept_p2m_type_to_flags(ept_entry, p2mt);
   1.152          }
   1.153          else
   1.154 -            ept_entry->mfn = mfn_x(mfn);
   1.155 -
   1.156 -        ept_entry->avail1 = p2mt;
   1.157 -        ept_entry->rsvd = 0;
   1.158 -        ept_entry->avail2 = 0;
   1.159 -        /* last step */
   1.160 -        ept_entry->r = ept_entry->w = ept_entry->x = 1;
   1.161 -        ept_p2m_type_to_flags(ept_entry, p2mt);
   1.162 +            ept_entry->epte = 0;
   1.163      }
   1.164      else
   1.165 -        ept_entry->epte = 0;
   1.166 +    {
   1.167 +        /* It's super page before, now set one of the 4k pages, so
   1.168 +         * we should split the 2m page to 4k pages now.
   1.169 +         */
   1.170 +
   1.171 +        ept_entry_t *split_table = NULL;
   1.172 +        ept_entry_t *split_ept_entry = NULL;
   1.173 +        unsigned long split_mfn = ept_entry->mfn;
   1.174 +        p2m_type_t split_p2mt = ept_entry->avail1;
   1.175 +
   1.176 +        /* alloc new page for new ept middle level entry which is
   1.177 +         * before a leaf super entry
   1.178 +         */
   1.179 +
   1.180 +        if ( !ept_set_middle_entry(d, ept_entry) )
   1.181 +            goto out;
   1.182 +
   1.183 +        /* split the super page before to 4k pages */
   1.184 +
   1.185 +        split_table = map_domain_page(ept_entry->mfn);
   1.186 +
   1.187 +        for ( i = 0; i < 512; i++ )
   1.188 +        {
   1.189 +            split_ept_entry = split_table + i;
   1.190 +            split_ept_entry->emt = EPT_DEFAULT_MT;
   1.191 +            split_ept_entry->sp_avail =  0;
   1.192 +
   1.193 +            split_ept_entry->mfn = split_mfn+i;
   1.194 +
   1.195 +            split_ept_entry->avail1 = split_p2mt;
   1.196 +            split_ept_entry->rsvd = 0;
   1.197 +            split_ept_entry->avail2 = 0;
   1.198 +            /* last step */
   1.199 +            split_ept_entry->r = split_ept_entry->w = split_ept_entry->x = 1;
   1.200 +            ept_p2m_type_to_flags(split_ept_entry, split_p2mt);
   1.201 +        }
   1.202 +
   1.203 +        /* Set the destinated 4k page as normal */
   1.204 +
   1.205 +        offset = gfn & ((1 << EPT_TABLE_ORDER) - 1);
   1.206 +        split_ept_entry = split_table + offset;
   1.207 +        split_ept_entry->mfn = mfn_x(mfn);
   1.208 +        split_ept_entry->avail1 = p2mt;
   1.209 +        ept_p2m_type_to_flags(split_ept_entry, p2mt);
   1.210 +
   1.211 +        unmap_domain_page(split_table);
   1.212 +
   1.213 +    }
   1.214  
   1.215      /* Success */
   1.216      rv = 1;
   1.217 @@ -179,22 +247,22 @@ out:
   1.218      {
   1.219          if ( p2mt == p2m_ram_rw )
   1.220          {
   1.221 -            if ( ret == GUEST_TABLE_SUPER_PAGE )
   1.222 +            if ( order == EPT_TABLE_ORDER )
   1.223              {
   1.224                  for ( i = 0; i < 512; i++ )
   1.225                      iommu_map_page(d, gfn-offset+i, mfn_x(mfn)-offset+i);
   1.226              }
   1.227 -            else if ( ret )
   1.228 +            else if ( !order )
   1.229                  iommu_map_page(d, gfn, mfn_x(mfn));
   1.230          }
   1.231          else
   1.232          {
   1.233 -            if ( ret == GUEST_TABLE_SUPER_PAGE )
   1.234 +            if ( order == EPT_TABLE_ORDER )
   1.235              {
   1.236                  for ( i = 0; i < 512; i++ )
   1.237                      iommu_unmap_page(d, gfn-offset+i);
   1.238              }
   1.239 -            else if ( ret )
   1.240 +            else if ( !order )
   1.241                  iommu_unmap_page(d, gfn);
   1.242          }
   1.243      }
   1.244 @@ -230,7 +298,7 @@ static mfn_t ept_get_entry(struct domain
   1.245      for ( i = EPT_DEFAULT_GAW; i > 0; i-- )
   1.246      {
   1.247          ret = ept_next_level(d, 1, &table, &gfn_remainder,
   1.248 -                             i * EPT_TABLE_ORDER);
   1.249 +                             i * EPT_TABLE_ORDER, 0);
   1.250          if ( !ret )
   1.251              goto out;
   1.252          else if ( ret == GUEST_TABLE_SUPER_PAGE )