ia64/xen-unstable

changeset 15782:b1c3b9df7d9a

[xen, xencomm] fix various xencomm invalid racy access.
- Xencomm should check struct xencomm_desc alignment.
- Xencomm should check whether struct xencomm_desc itself (8 bytes)
doesn't cross page boundary. Otherwise a hostile guest kernel can
pass such a pointer that may across page boundary. Then xencomm
accesses an unrelated page.
- Xencomm shouldn't access struct xencomm_desc::nr_addrs multiple
times. Copy it to local area and use the copy.
Otherwise a hostile guest can modify at the same time.
- Xencomm should check whether struct xencomm_desc::address[] array
crosses page boundary. Otherwise xencomm may access unrelated pages.
- Xencomm should get_page()/put_page() after address conversion from
paddr to maddr because xen supports SMP and balloon driver.
Otherwise another vcpu may free the page at the same time.
Such a domain behaviour doesn't make sense, however nothing prevents
it.

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
author kfraser@localhost.localdomain
date Tue Aug 28 15:31:56 2007 +0100 (2007-08-28)
parents 58d131f1fb35
children c93e2a822d6f
files xen/common/xencomm.c
line diff
     1.1 --- a/xen/common/xencomm.c	Fri Aug 24 16:32:56 2007 +0100
     1.2 +++ b/xen/common/xencomm.c	Tue Aug 28 15:31:56 2007 +0100
     1.3 @@ -34,9 +34,154 @@
     1.4  #endif
     1.5  
     1.6  static void*
     1.7 -xencomm_maddr_to_vaddr(unsigned long maddr)
     1.8 +xencomm_vaddr(unsigned long paddr, struct page_info *page)
     1.9  {
    1.10 -    return maddr ? maddr_to_virt(maddr) : NULL;
    1.11 +    return (void*)((paddr & ~PAGE_MASK) | (unsigned long)page_to_virt(page));
    1.12 +}
    1.13 +
    1.14 +/* get_page() to prevent from another vcpu freeing the page */
    1.15 +static int
    1.16 +xencomm_get_page(unsigned long paddr, struct page_info **page)
    1.17 +{
    1.18 +    unsigned long maddr = paddr_to_maddr(paddr);
    1.19 +    if ( maddr == 0 )
    1.20 +        return -EFAULT;
    1.21 +        
    1.22 +    *page = maddr_to_page(maddr);
    1.23 +    if ( get_page(*page, current->domain) == 0 )
    1.24 +    {
    1.25 +        if ( page_get_owner(*page) != current->domain )
    1.26 +        {
    1.27 +            /*
    1.28 +             * This page might be a page granted by another domain, or
    1.29 +             * this page is freed with decrease reservation hypercall at
    1.30 +             * the same time.
    1.31 +             */
    1.32 +            gdprintk(XENLOG_WARNING,
    1.33 +                     "bad page is passed. paddr 0x%lx maddr 0x%lx\n",
    1.34 +                     paddr, maddr);
    1.35 +            return -EFAULT;
    1.36 +        }
    1.37 +
    1.38 +        /* Try again. */
    1.39 +        cpu_relax();
    1.40 +        return -EAGAIN;
    1.41 +    }
    1.42 +
    1.43 +    return 0;
    1.44 +}
    1.45 +
    1.46 +/* check if struct desc doesn't cross page boundry */
    1.47 +static int
    1.48 +xencomm_desc_cross_page_boundary(unsigned long paddr)
    1.49 +{
    1.50 +    unsigned long offset = paddr & ~PAGE_MASK;
    1.51 +    if ( offset > PAGE_SIZE - sizeof(struct xencomm_desc) )
    1.52 +        return 1;
    1.53 +    return 0;
    1.54 +}
    1.55 +
    1.56 +struct xencomm_ctxt {
    1.57 +    struct xencomm_desc *desc;
    1.58 +
    1.59 +    uint32_t nr_addrs;
    1.60 +    struct page_info *page;
    1.61 +};
    1.62 +
    1.63 +static uint32_t
    1.64 +xencomm_ctxt_nr_addrs(const struct xencomm_ctxt *ctxt)
    1.65 +{
    1.66 +    return ctxt->nr_addrs;
    1.67 +}
    1.68 +
    1.69 +static unsigned long*
    1.70 +xencomm_ctxt_address(struct xencomm_ctxt *ctxt, int i)
    1.71 +{
    1.72 +    return &ctxt->desc->address[i];
    1.73 +}
    1.74 +
    1.75 +/* check if struct xencomm_desc and address array cross page boundary */
    1.76 +static int
    1.77 +xencomm_ctxt_cross_page_boundary(struct xencomm_ctxt *ctxt)
    1.78 +{
    1.79 +    unsigned long saddr = (unsigned long)ctxt->desc;
    1.80 +    unsigned long eaddr =
    1.81 +        (unsigned long)&ctxt->desc->address[ctxt->nr_addrs] - 1;
    1.82 +    if ( (saddr >> PAGE_SHIFT) != (eaddr >> PAGE_SHIFT) )
    1.83 +        return 1;
    1.84 +    return 0;
    1.85 +}
    1.86 +
    1.87 +static int
    1.88 +xencomm_ctxt_init(const void* handle, struct xencomm_ctxt *ctxt)
    1.89 +{
    1.90 +    struct page_info *page;
    1.91 +    struct xencomm_desc *desc;
    1.92 +    int ret;
    1.93 +
    1.94 +    /* avoid unaligned access */
    1.95 +    if ( (unsigned long)handle % __alignof__(*desc) != 0 )
    1.96 +        return -EINVAL;
    1.97 +    if ( xencomm_desc_cross_page_boundary((unsigned long)handle) )
    1.98 +        return -EINVAL;
    1.99 +
   1.100 +    /* first we need to access the descriptor */
   1.101 +    ret = xencomm_get_page((unsigned long)handle, &page);
   1.102 +    if ( ret )
   1.103 +        return ret;
   1.104 +
   1.105 +    desc = xencomm_vaddr((unsigned long)handle, page);
   1.106 +    if ( desc->magic != XENCOMM_MAGIC )
   1.107 +    {
   1.108 +        printk("%s: error: %p magic was 0x%x\n", __func__, desc, desc->magic);
   1.109 +        put_page(page);
   1.110 +        return -EINVAL;
   1.111 +    }
   1.112 +
   1.113 +    ctxt->desc = desc;
   1.114 +    ctxt->nr_addrs = desc->nr_addrs; /* copy before use.
   1.115 +                                      * It is possible for a guest domain to
   1.116 +                                      * modify concurrently.
   1.117 +                                      */
   1.118 +    ctxt->page = page;
   1.119 +    if ( xencomm_ctxt_cross_page_boundary(ctxt) )
   1.120 +    {
   1.121 +        put_page(page);
   1.122 +        return -EINVAL;
   1.123 +    }
   1.124 +    return 0;
   1.125 +}
   1.126 +
   1.127 +static void
   1.128 +xencomm_ctxt_done(struct xencomm_ctxt *ctxt)
   1.129 +{
   1.130 +    put_page(ctxt->page);
   1.131 +}
   1.132 +
   1.133 +static int
   1.134 +xencomm_copy_chunk_from(
   1.135 +    unsigned long to, unsigned long paddr, unsigned int  len)
   1.136 +{
   1.137 +    struct page_info *page;
   1.138 +
   1.139 +    while (1)
   1.140 +    {
   1.141 +        int res;
   1.142 +        res = xencomm_get_page(paddr, &page);
   1.143 +        if ( res != 0 )
   1.144 +        {
   1.145 +            if ( res == -EAGAIN )
   1.146 +                continue; /* Try again. */
   1.147 +            return res;
   1.148 +        }
   1.149 +        xc_dprintk("%lx[%d] -> %lx\n",
   1.150 +                   (unsigned long)xencomm_vaddr(paddr, page), len, to);
   1.151 +
   1.152 +        memcpy((void *)to, xencomm_vaddr(paddr, page), len);
   1.153 +        put_page(page);
   1.154 +        return 0;
   1.155 +    }
   1.156 +    /* NOTREACHED */
   1.157  }
   1.158  
   1.159  static unsigned long
   1.160 @@ -48,14 +193,12 @@ xencomm_inline_from_guest(
   1.161      while ( n > 0 )
   1.162      {
   1.163          unsigned int chunksz, bytes;
   1.164 -        unsigned long src_maddr;
   1.165  
   1.166          chunksz = PAGE_SIZE - (src_paddr % PAGE_SIZE);
   1.167          bytes   = min(chunksz, n);
   1.168  
   1.169 -        src_maddr = paddr_to_maddr(src_paddr);
   1.170 -        xc_dprintk("%lx[%d] -> %lx\n", src_maddr, bytes, (unsigned long)to);
   1.171 -        memcpy(to, maddr_to_virt(src_maddr), bytes);
   1.172 +        if ( xencomm_copy_chunk_from((unsigned long)to, src_paddr, bytes) )
   1.173 +            return n;
   1.174          src_paddr += bytes;
   1.175          to += bytes;
   1.176          n -= bytes;
   1.177 @@ -81,7 +224,7 @@ unsigned long
   1.178  xencomm_copy_from_guest(
   1.179      void *to, const void *from, unsigned int n, unsigned int skip)
   1.180  {
   1.181 -    struct xencomm_desc *desc;
   1.182 +    struct xencomm_ctxt ctxt;
   1.183      unsigned int from_pos = 0;
   1.184      unsigned int to_pos = 0;
   1.185      unsigned int i = 0;
   1.186 @@ -89,26 +232,14 @@ xencomm_copy_from_guest(
   1.187      if ( xencomm_is_inline(from) )
   1.188          return xencomm_inline_from_guest(to, from, n, skip);
   1.189  
   1.190 -    /* First we need to access the descriptor. */
   1.191 -    desc = (struct xencomm_desc *)
   1.192 -        xencomm_maddr_to_vaddr(paddr_to_maddr((unsigned long)from));
   1.193 -    if ( desc == NULL )
   1.194 +    if ( xencomm_ctxt_init(from, &ctxt) )
   1.195          return n;
   1.196  
   1.197 -    if ( desc->magic != XENCOMM_MAGIC )
   1.198 +    /* Iterate through the descriptor, copying up to a page at a time */
   1.199 +    while ( (to_pos < n) && (i < xencomm_ctxt_nr_addrs(&ctxt)) )
   1.200      {
   1.201 -        printk("%s: error: %p magic was 0x%x\n",
   1.202 -               __func__, desc, desc->magic);
   1.203 -        return n;
   1.204 -    }
   1.205 -
   1.206 -    /* Iterate through the descriptor, copying up to a page at a time. */
   1.207 -    while ( (to_pos < n) && (i < desc->nr_addrs) )
   1.208 -    {
   1.209 -        unsigned long src_paddr = desc->address[i];
   1.210 -        unsigned int pgoffset;
   1.211 -        unsigned int chunksz;
   1.212 -        unsigned int chunk_skip;
   1.213 +        unsigned long src_paddr = *xencomm_ctxt_address(&ctxt, i);
   1.214 +        unsigned int pgoffset, chunksz, chunk_skip;
   1.215  
   1.216          if ( src_paddr == XENCOMM_INVALID )
   1.217          {
   1.218 @@ -124,18 +255,13 @@ xencomm_copy_from_guest(
   1.219          chunksz -= chunk_skip;
   1.220          skip -= chunk_skip;
   1.221  
   1.222 -        if ( (skip == 0) && (chunksz > 0) )
   1.223 +        if ( skip == 0 && chunksz > 0 )
   1.224          {
   1.225 -            unsigned long src_maddr;
   1.226 -            unsigned long dest = (unsigned long)to + to_pos;
   1.227              unsigned int bytes = min(chunksz, n - to_pos);
   1.228  
   1.229 -            src_maddr = paddr_to_maddr(src_paddr + chunk_skip);
   1.230 -            if ( src_maddr == 0 )
   1.231 -                return n - to_pos;
   1.232 -
   1.233 -            xc_dprintk("%lx[%d] -> %lx\n", src_maddr, bytes, dest);
   1.234 -            memcpy((void *)dest, maddr_to_virt(src_maddr), bytes);
   1.235 +            if ( xencomm_copy_chunk_from((unsigned long)to + to_pos,
   1.236 +                                         src_paddr + chunk_skip, bytes) )
   1.237 +                goto out;
   1.238              from_pos += bytes;
   1.239              to_pos += bytes;
   1.240          }
   1.241 @@ -143,9 +269,37 @@ xencomm_copy_from_guest(
   1.242          i++;
   1.243      }
   1.244  
   1.245 +out:
   1.246 +    xencomm_ctxt_done(&ctxt);
   1.247      return n - to_pos;
   1.248  }
   1.249  
   1.250 +static int
   1.251 +xencomm_copy_chunk_to(
   1.252 +    unsigned long paddr, unsigned long from, unsigned int  len)
   1.253 +{
   1.254 +    struct page_info *page;
   1.255 +
   1.256 +    while (1)
   1.257 +    {
   1.258 +        int res;
   1.259 +        res = xencomm_get_page(paddr, &page);
   1.260 +        if ( res != 0 )
   1.261 +        {
   1.262 +            if ( res == -EAGAIN )
   1.263 +                continue; /* Try again.  */
   1.264 +            return res;
   1.265 +        }
   1.266 +        xc_dprintk("%lx[%d] -> %lx\n", from, len,
   1.267 +                   (unsigned long)xencomm_vaddr(paddr, page));
   1.268 +
   1.269 +        memcpy(xencomm_vaddr(paddr, page), (void *)from, len);
   1.270 +        put_page(page);
   1.271 +        return 0;
   1.272 +    }
   1.273 +    /* NOTREACHED */
   1.274 +}
   1.275 +
   1.276  static unsigned long
   1.277  xencomm_inline_to_guest(
   1.278      void *to, const void *from, unsigned int n, unsigned int skip)
   1.279 @@ -155,14 +309,12 @@ xencomm_inline_to_guest(
   1.280      while ( n > 0 )
   1.281      {
   1.282          unsigned int chunksz, bytes;
   1.283 -        unsigned long dest_maddr;
   1.284  
   1.285          chunksz = PAGE_SIZE - (dest_paddr % PAGE_SIZE);
   1.286          bytes   = min(chunksz, n);
   1.287  
   1.288 -        dest_maddr = paddr_to_maddr(dest_paddr);
   1.289 -        xc_dprintk("%lx[%d] -> %lx\n", (unsigned long)from, bytes, dest_maddr);
   1.290 -        memcpy(maddr_to_virt(dest_maddr), (void *)from, bytes);
   1.291 +        if ( xencomm_copy_chunk_to(dest_paddr, (unsigned long)from, bytes) )
   1.292 +            return n;
   1.293          dest_paddr += bytes;
   1.294          from += bytes;
   1.295          n -= bytes;
   1.296 @@ -188,7 +340,7 @@ unsigned long
   1.297  xencomm_copy_to_guest(
   1.298      void *to, const void *from, unsigned int n, unsigned int skip)
   1.299  {
   1.300 -    struct xencomm_desc *desc;
   1.301 +    struct xencomm_ctxt ctxt;
   1.302      unsigned int from_pos = 0;
   1.303      unsigned int to_pos = 0;
   1.304      unsigned int i = 0;
   1.305 @@ -196,22 +348,13 @@ xencomm_copy_to_guest(
   1.306      if ( xencomm_is_inline(to) )
   1.307          return xencomm_inline_to_guest(to, from, n, skip);
   1.308  
   1.309 -    /* First we need to access the descriptor. */
   1.310 -    desc = (struct xencomm_desc *)
   1.311 -        xencomm_maddr_to_vaddr(paddr_to_maddr((unsigned long)to));
   1.312 -    if ( desc == NULL )
   1.313 +    if ( xencomm_ctxt_init(to, &ctxt) )
   1.314          return n;
   1.315  
   1.316 -    if ( desc->magic != XENCOMM_MAGIC )
   1.317 +    /* Iterate through the descriptor, copying up to a page at a time */
   1.318 +    while ( (from_pos < n) && (i < xencomm_ctxt_nr_addrs(&ctxt)) )
   1.319      {
   1.320 -        printk("%s error: %p magic was 0x%x\n", __func__, desc, desc->magic);
   1.321 -        return n;
   1.322 -    }
   1.323 -
   1.324 -    /* Iterate through the descriptor, copying up to a page at a time. */
   1.325 -    while ( (from_pos < n) && (i < desc->nr_addrs) )
   1.326 -    {
   1.327 -        unsigned long dest_paddr = desc->address[i];
   1.328 +        unsigned long dest_paddr = *xencomm_ctxt_address(&ctxt, i);
   1.329          unsigned int pgoffset, chunksz, chunk_skip;
   1.330  
   1.331          if ( dest_paddr == XENCOMM_INVALID )
   1.332 @@ -228,18 +371,13 @@ xencomm_copy_to_guest(
   1.333          chunksz -= chunk_skip;
   1.334          skip -= chunk_skip;
   1.335  
   1.336 -        if ( (skip == 0) && (chunksz > 0) )
   1.337 +        if ( skip == 0 && chunksz > 0 )
   1.338          {
   1.339 -            unsigned long dest_maddr;
   1.340 -            unsigned long source = (unsigned long)from + from_pos;
   1.341              unsigned int bytes = min(chunksz, n - from_pos);
   1.342  
   1.343 -            dest_maddr = paddr_to_maddr(dest_paddr + chunk_skip);
   1.344 -            if ( dest_maddr == 0 )
   1.345 -                return n - from_pos;
   1.346 -
   1.347 -            xc_dprintk("%lx[%d] -> %lx\n", source, bytes, dest_maddr);
   1.348 -            memcpy(maddr_to_virt(dest_maddr), (void *)source, bytes);
   1.349 +            if ( xencomm_copy_chunk_to(dest_paddr + chunk_skip,
   1.350 +                                      (unsigned long)from + from_pos, bytes) )
   1.351 +                goto out;
   1.352              from_pos += bytes;
   1.353              to_pos += bytes;
   1.354          }
   1.355 @@ -247,6 +385,8 @@ xencomm_copy_to_guest(
   1.356          i++;
   1.357      }
   1.358  
   1.359 +out:
   1.360 +    xencomm_ctxt_done(&ctxt);
   1.361      return n - from_pos;
   1.362  }
   1.363  
   1.364 @@ -260,29 +400,23 @@ static int xencomm_inline_add_offset(voi
   1.365   * exhausted pages to XENCOMM_INVALID. */
   1.366  int xencomm_add_offset(void **handle, unsigned int bytes)
   1.367  {
   1.368 -    struct xencomm_desc *desc;
   1.369 +    struct xencomm_ctxt ctxt;
   1.370      int i = 0;
   1.371  
   1.372      if ( xencomm_is_inline(*handle) )
   1.373          return xencomm_inline_add_offset(handle, bytes);
   1.374  
   1.375 -    /* First we need to access the descriptor. */
   1.376 -    desc = (struct xencomm_desc *)
   1.377 -        xencomm_maddr_to_vaddr(paddr_to_maddr((unsigned long)*handle));
   1.378 -    if ( desc == NULL )
   1.379 +    if ( xencomm_ctxt_init(handle, &ctxt) )
   1.380          return -1;
   1.381  
   1.382 -    if ( desc->magic != XENCOMM_MAGIC )
   1.383 +    /* Iterate through the descriptor incrementing addresses */
   1.384 +    while ( (bytes > 0) && (i < xencomm_ctxt_nr_addrs(&ctxt)) )
   1.385      {
   1.386 -        printk("%s error: %p magic was 0x%x\n", __func__, desc, desc->magic);
   1.387 -        return -1;
   1.388 -    }
   1.389 -
   1.390 -    /* Iterate through the descriptor incrementing addresses. */
   1.391 -    while ( (bytes > 0) && (i < desc->nr_addrs) )
   1.392 -    {
   1.393 -        unsigned long dest_paddr = desc->address[i];
   1.394 -        unsigned int pgoffset, chunksz, chunk_skip;
   1.395 +        unsigned long *address = xencomm_ctxt_address(&ctxt, i);
   1.396 +        unsigned long dest_paddr = *address;
   1.397 +        unsigned int pgoffset;
   1.398 +        unsigned int chunksz;
   1.399 +        unsigned int chunk_skip;
   1.400  
   1.401          if ( dest_paddr == XENCOMM_INVALID )
   1.402          {
   1.403 @@ -295,33 +429,39 @@ int xencomm_add_offset(void **handle, un
   1.404  
   1.405          chunk_skip = min(chunksz, bytes);
   1.406          if ( chunk_skip == chunksz )
   1.407 -            desc->address[i] = XENCOMM_INVALID; /* exchausted this page */
   1.408 +            *address = XENCOMM_INVALID; /* exhausted this page */
   1.409          else
   1.410 -            desc->address[i] += chunk_skip;
   1.411 +            *address += chunk_skip;
   1.412          bytes -= chunk_skip;
   1.413  
   1.414          i++;
   1.415      }
   1.416 -
   1.417 +    xencomm_ctxt_done(&ctxt);
   1.418      return 0;
   1.419  }
   1.420  
   1.421  int xencomm_handle_is_null(void *handle)
   1.422  {
   1.423 -    struct xencomm_desc *desc;
   1.424 +    struct xencomm_ctxt ctxt;
   1.425      int i;
   1.426 +    int res = 1;
   1.427  
   1.428      if ( xencomm_is_inline(handle) )
   1.429          return xencomm_inline_addr(handle) == 0;
   1.430  
   1.431 -    desc = (struct xencomm_desc *)
   1.432 -        xencomm_maddr_to_vaddr(paddr_to_maddr((unsigned long)handle));
   1.433 -    if ( desc == NULL )
   1.434 +    if ( xencomm_ctxt_init(handle, &ctxt) )
   1.435          return 1;
   1.436  
   1.437 -    for ( i = 0; i < desc->nr_addrs; i++ )
   1.438 -        if ( desc->address[i] != XENCOMM_INVALID )
   1.439 -            return 0;
   1.440 +    for ( i = 0; i < xencomm_ctxt_nr_addrs(&ctxt); i++ )
   1.441 +    {
   1.442 +        if ( *xencomm_ctxt_address(&ctxt, i) != XENCOMM_INVALID )
   1.443 +        {
   1.444 +            res = 0;
   1.445 +            goto out;
   1.446 +        }
   1.447 +    }
   1.448  
   1.449 -    return 1;
   1.450 +out:
   1.451 +    xencomm_ctxt_done(&ctxt);
   1.452 +    return res;
   1.453  }