ia64/xen-unstable

view xen/common/grant_table.c @ 6832:5959fae4722a

Set NE bit for VMX guest CR0. VMCS guest CR0.NE bit must
be set, else it will cause "vm-entry failed".

Signed-off-by: Chengyuan Li <chengyuan.li@intel.com>
author kaf24@firebug.cl.cam.ac.uk
date Wed Sep 14 13:37:50 2005 +0000 (2005-09-14)
parents cdfa7dd00c44
children b2f4823b6ff0 b35215021b32 9af349b055e5 3233e7ecfa9f
line source
1 /******************************************************************************
2 * common/grant_table.c
3 *
4 * Mechanism for granting foreign access to page frames, and receiving
5 * page-ownership transfers.
6 *
7 * Copyright (c) 2005 Christopher Clark
8 * Copyright (c) 2004 K A Fraser
9 * Copyright (c) 2005 Andrew Warfield
10 * Modifications by Geoffrey Lefebvre are (c) Intel Research Cambridge
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
16 *
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 */
27 #define GRANT_DEBUG 0
28 #define GRANT_DEBUG_VERBOSE 0
30 #include <xen/config.h>
31 #include <xen/lib.h>
32 #include <xen/sched.h>
33 #include <xen/shadow.h>
34 #include <xen/mm.h>
35 #include <acm/acm_hooks.h>
37 #if defined(CONFIG_X86_64)
38 #define GRANT_PTE_FLAGS (_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
39 #else
40 #define GRANT_PTE_FLAGS (_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_DIRTY)
41 #endif
43 #define PIN_FAIL(_lbl, _rc, _f, _a...) \
44 do { \
45 DPRINTK( _f, ## _a ); \
46 rc = (_rc); \
47 goto _lbl; \
48 } while ( 0 )
50 static inline int
51 get_maptrack_handle(
52 grant_table_t *t)
53 {
54 unsigned int h;
55 if ( unlikely((h = t->maptrack_head) == (t->maptrack_limit - 1)) )
56 return -1;
57 t->maptrack_head = t->maptrack[h].ref_and_flags >> MAPTRACK_REF_SHIFT;
58 t->map_count++;
59 return h;
60 }
62 static inline void
63 put_maptrack_handle(
64 grant_table_t *t, int handle)
65 {
66 t->maptrack[handle].ref_and_flags = t->maptrack_head << MAPTRACK_REF_SHIFT;
67 t->maptrack_head = handle;
68 t->map_count--;
69 }
71 static int
72 __gnttab_activate_grant_ref(
73 struct domain *mapping_d, /* IN */
74 struct vcpu *mapping_ed,
75 struct domain *granting_d,
76 grant_ref_t ref,
77 u16 dev_hst_ro_flags,
78 u64 addr,
79 unsigned long *pframe ) /* OUT */
80 {
81 domid_t sdom;
82 u16 sflags;
83 active_grant_entry_t *act;
84 grant_entry_t *sha;
85 s16 rc = 1;
86 unsigned long frame = 0;
87 int retries = 0;
89 /*
90 * Objectives of this function:
91 * . Make the record ( granting_d, ref ) active, if not already.
92 * . Update shared grant entry of owner, indicating frame is mapped.
93 * . Increment the owner act->pin reference counts.
94 * . get_page on shared frame if new mapping.
95 * . get_page_type if this is first RW mapping of frame.
96 * . Add PTE to virtual address space of mapping_d, if necessary.
97 * Returns:
98 * . -ve: error
99 * . 1: ok
100 * . 0: ok and TLB invalidate of host_addr needed.
101 *
102 * On success, *pframe contains mfn.
103 */
105 /*
106 * We bound the number of times we retry CMPXCHG on memory locations that
107 * we share with a guest OS. The reason is that the guest can modify that
108 * location at a higher rate than we can read-modify-CMPXCHG, so the guest
109 * could cause us to livelock. There are a few cases where it is valid for
110 * the guest to race our updates (e.g., to change the GTF_readonly flag),
111 * so we allow a few retries before failing.
112 */
114 act = &granting_d->grant_table->active[ref];
115 sha = &granting_d->grant_table->shared[ref];
117 spin_lock(&granting_d->grant_table->lock);
119 if ( act->pin == 0 )
120 {
121 /* CASE 1: Activating a previously inactive entry. */
123 sflags = sha->flags;
124 sdom = sha->domid;
126 /* This loop attempts to set the access (reading/writing) flags
127 * in the grant table entry. It tries a cmpxchg on the field
128 * up to five times, and then fails under the assumption that
129 * the guest is misbehaving. */
130 for ( ; ; )
131 {
132 u32 scombo, prev_scombo, new_scombo;
134 if ( unlikely((sflags & GTF_type_mask) != GTF_permit_access) ||
135 unlikely(sdom != mapping_d->domain_id) )
136 PIN_FAIL(unlock_out, GNTST_general_error,
137 "Bad flags (%x) or dom (%d). (NB. expected dom %d)\n",
138 sflags, sdom, mapping_d->domain_id);
140 /* Merge two 16-bit values into a 32-bit combined update. */
141 /* NB. Endianness! */
142 prev_scombo = scombo = ((u32)sdom << 16) | (u32)sflags;
144 new_scombo = scombo | GTF_reading;
145 if ( !(dev_hst_ro_flags & GNTMAP_readonly) )
146 {
147 new_scombo |= GTF_writing;
148 if ( unlikely(sflags & GTF_readonly) )
149 PIN_FAIL(unlock_out, GNTST_general_error,
150 "Attempt to write-pin a r/o grant entry.\n");
151 }
153 /* NB. prev_scombo is updated in place to seen value. */
154 if ( unlikely(cmpxchg_user((u32 *)&sha->flags,
155 prev_scombo,
156 new_scombo)) )
157 PIN_FAIL(unlock_out, GNTST_general_error,
158 "Fault while modifying shared flags and domid.\n");
160 /* Did the combined update work (did we see what we expected?). */
161 if ( likely(prev_scombo == scombo) )
162 break;
164 if ( retries++ == 4 )
165 PIN_FAIL(unlock_out, GNTST_general_error,
166 "Shared grant entry is unstable.\n");
168 /* Didn't see what we expected. Split out the seen flags & dom. */
169 /* NB. Endianness! */
170 sflags = (u16)prev_scombo;
171 sdom = (u16)(prev_scombo >> 16);
172 }
174 /* rmb(); */ /* not on x86 */
176 frame = __gpfn_to_mfn_foreign(granting_d, sha->frame);
178 if ( unlikely(!pfn_valid(frame)) ||
179 unlikely(!((dev_hst_ro_flags & GNTMAP_readonly) ?
180 get_page(&frame_table[frame], granting_d) :
181 get_page_and_type(&frame_table[frame], granting_d,
182 PGT_writable_page))) )
183 {
184 clear_bit(_GTF_writing, &sha->flags);
185 clear_bit(_GTF_reading, &sha->flags);
186 PIN_FAIL(unlock_out, GNTST_general_error,
187 "Could not pin the granted frame (%lx)!\n", frame);
188 }
190 if ( dev_hst_ro_flags & GNTMAP_device_map )
191 act->pin += (dev_hst_ro_flags & GNTMAP_readonly) ?
192 GNTPIN_devr_inc : GNTPIN_devw_inc;
193 if ( dev_hst_ro_flags & GNTMAP_host_map )
194 act->pin += (dev_hst_ro_flags & GNTMAP_readonly) ?
195 GNTPIN_hstr_inc : GNTPIN_hstw_inc;
196 act->domid = sdom;
197 act->frame = frame;
198 }
199 else
200 {
201 /* CASE 2: Active modications to an already active entry. */
203 /*
204 * A cheesy check for possible pin-count overflow.
205 * A more accurate check cannot be done with a single comparison.
206 */
207 if ( (act->pin & 0x80808080U) != 0 )
208 PIN_FAIL(unlock_out, ENOSPC,
209 "Risk of counter overflow %08x\n", act->pin);
211 frame = act->frame;
213 if ( !(dev_hst_ro_flags & GNTMAP_readonly) &&
214 !((sflags = sha->flags) & GTF_writing) )
215 {
216 for ( ; ; )
217 {
218 u16 prev_sflags;
220 if ( unlikely(sflags & GTF_readonly) )
221 PIN_FAIL(unlock_out, GNTST_general_error,
222 "Attempt to write-pin a r/o grant entry.\n");
224 prev_sflags = sflags;
226 /* NB. prev_sflags is updated in place to seen value. */
227 if ( unlikely(cmpxchg_user(&sha->flags, prev_sflags,
228 prev_sflags | GTF_writing)) )
229 PIN_FAIL(unlock_out, GNTST_general_error,
230 "Fault while modifying shared flags.\n");
232 if ( likely(prev_sflags == sflags) )
233 break;
235 if ( retries++ == 4 )
236 PIN_FAIL(unlock_out, GNTST_general_error,
237 "Shared grant entry is unstable.\n");
239 sflags = prev_sflags;
240 }
242 if ( unlikely(!get_page_type(&frame_table[frame],
243 PGT_writable_page)) )
244 {
245 clear_bit(_GTF_writing, &sha->flags);
246 PIN_FAIL(unlock_out, GNTST_general_error,
247 "Attempt to write-pin a unwritable page.\n");
248 }
249 }
251 if ( dev_hst_ro_flags & GNTMAP_device_map )
252 act->pin += (dev_hst_ro_flags & GNTMAP_readonly) ?
253 GNTPIN_devr_inc : GNTPIN_devw_inc;
255 if ( dev_hst_ro_flags & GNTMAP_host_map )
256 act->pin += (dev_hst_ro_flags & GNTMAP_readonly) ?
257 GNTPIN_hstr_inc : GNTPIN_hstw_inc;
258 }
260 /*
261 * At this point:
262 * act->pin updated to reference count mappings.
263 * sha->flags updated to indicate to granting domain mapping done.
264 * frame contains the mfn.
265 */
267 spin_unlock(&granting_d->grant_table->lock);
269 if ( (addr != 0) && (dev_hst_ro_flags & GNTMAP_host_map) )
270 {
271 /* Write update into the pagetable. */
272 l1_pgentry_t pte;
273 pte = l1e_from_pfn(frame, GRANT_PTE_FLAGS);
275 if ( (dev_hst_ro_flags & GNTMAP_application_map) )
276 l1e_add_flags(pte,_PAGE_USER);
277 if ( !(dev_hst_ro_flags & GNTMAP_readonly) )
278 l1e_add_flags(pte,_PAGE_RW);
280 if ( dev_hst_ro_flags & GNTMAP_contains_pte )
281 rc = update_grant_pte_mapping(addr, pte, mapping_d, mapping_ed);
282 else
283 rc = update_grant_va_mapping(addr, pte, mapping_d, mapping_ed);
285 /* IMPORTANT: rc indicates the degree of TLB flush that is required.
286 * GNTST_flush_one (1) or GNTST_flush_all (2). This is done in the
287 * outer gnttab_map_grant_ref. */
288 if ( rc < 0 )
289 {
290 /* Failure: undo and abort. */
292 spin_lock(&granting_d->grant_table->lock);
294 if ( dev_hst_ro_flags & GNTMAP_readonly )
295 {
296 act->pin -= GNTPIN_hstr_inc;
297 }
298 else
299 {
300 act->pin -= GNTPIN_hstw_inc;
301 if ( (act->pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) == 0 )
302 {
303 clear_bit(_GTF_writing, &sha->flags);
304 put_page_type(&frame_table[frame]);
305 }
306 }
308 if ( act->pin == 0 )
309 {
310 clear_bit(_GTF_reading, &sha->flags);
311 put_page(&frame_table[frame]);
312 }
314 spin_unlock(&granting_d->grant_table->lock);
315 }
317 }
319 *pframe = frame;
320 return rc;
322 unlock_out:
323 spin_unlock(&granting_d->grant_table->lock);
324 return rc;
325 }
327 /*
328 * Returns 0 if TLB flush / invalidate required by caller.
329 * va will indicate the address to be invalidated.
330 *
331 * addr is _either_ a host virtual address, or the address of the pte to
332 * update, as indicated by the GNTMAP_contains_pte flag.
333 */
334 static int
335 __gnttab_map_grant_ref(
336 gnttab_map_grant_ref_t *uop,
337 unsigned long *va)
338 {
339 domid_t dom;
340 grant_ref_t ref;
341 struct domain *ld, *rd;
342 struct vcpu *led;
343 u16 dev_hst_ro_flags;
344 int handle;
345 u64 addr;
346 unsigned long frame = 0;
347 int rc;
349 led = current;
350 ld = led->domain;
352 /* Bitwise-OR avoids short-circuiting which screws control flow. */
353 if ( unlikely(__get_user(dom, &uop->dom) |
354 __get_user(ref, &uop->ref) |
355 __get_user(addr, &uop->host_addr) |
356 __get_user(dev_hst_ro_flags, &uop->flags)) )
357 {
358 DPRINTK("Fault while reading gnttab_map_grant_ref_t.\n");
359 return -EFAULT; /* don't set status */
360 }
362 if ( (dev_hst_ro_flags & GNTMAP_host_map) &&
363 ( (addr == 0) ||
364 (!(dev_hst_ro_flags & GNTMAP_contains_pte) &&
365 unlikely(!__addr_ok(addr))) ) )
366 {
367 DPRINTK("Bad virtual address (%"PRIx64") or flags (%"PRIx16").\n",
368 addr, dev_hst_ro_flags);
369 (void)__put_user(GNTST_bad_virt_addr, &uop->handle);
370 return GNTST_bad_gntref;
371 }
373 if ( unlikely(ref >= NR_GRANT_ENTRIES) ||
374 unlikely((dev_hst_ro_flags &
375 (GNTMAP_device_map|GNTMAP_host_map)) == 0) )
376 {
377 DPRINTK("Bad ref (%d) or flags (%x).\n", ref, dev_hst_ro_flags);
378 (void)__put_user(GNTST_bad_gntref, &uop->handle);
379 return GNTST_bad_gntref;
380 }
382 if (acm_pre_grant_map_ref(dom)) {
383 (void)__put_user(GNTST_permission_denied, &uop->handle);
384 return GNTST_permission_denied;
385 }
387 if ( unlikely((rd = find_domain_by_id(dom)) == NULL) ||
388 unlikely(ld == rd) )
389 {
390 if ( rd != NULL )
391 put_domain(rd);
392 DPRINTK("Could not find domain %d\n", dom);
393 (void)__put_user(GNTST_bad_domain, &uop->handle);
394 return GNTST_bad_domain;
395 }
397 /* Get a maptrack handle. */
398 if ( unlikely((handle = get_maptrack_handle(ld->grant_table)) == -1) )
399 {
400 int i;
401 grant_mapping_t *new_mt;
402 grant_table_t *lgt = ld->grant_table;
404 if ( (lgt->maptrack_limit << 1) > MAPTRACK_MAX_ENTRIES )
405 {
406 put_domain(rd);
407 DPRINTK("Maptrack table is at maximum size.\n");
408 (void)__put_user(GNTST_no_device_space, &uop->handle);
409 return GNTST_no_device_space;
410 }
412 /* Grow the maptrack table. */
413 new_mt = alloc_xenheap_pages(lgt->maptrack_order + 1);
414 if ( new_mt == NULL )
415 {
416 put_domain(rd);
417 DPRINTK("No more map handles available.\n");
418 (void)__put_user(GNTST_no_device_space, &uop->handle);
419 return GNTST_no_device_space;
420 }
422 memcpy(new_mt, lgt->maptrack, PAGE_SIZE << lgt->maptrack_order);
423 for ( i = lgt->maptrack_limit; i < (lgt->maptrack_limit << 1); i++ )
424 new_mt[i].ref_and_flags = (i+1) << MAPTRACK_REF_SHIFT;
426 free_xenheap_pages(lgt->maptrack, lgt->maptrack_order);
427 lgt->maptrack = new_mt;
428 lgt->maptrack_order += 1;
429 lgt->maptrack_limit <<= 1;
431 DPRINTK("Doubled maptrack size\n");
432 handle = get_maptrack_handle(ld->grant_table);
433 }
435 #if GRANT_DEBUG_VERBOSE
436 DPRINTK("Mapping grant ref (%hu) for domain (%hu) with flags (%x)\n",
437 ref, dom, dev_hst_ro_flags);
438 #endif
440 if ( (rc = __gnttab_activate_grant_ref(ld, led, rd, ref, dev_hst_ro_flags,
441 addr, &frame)) >= 0 )
442 {
443 /*
444 * Only make the maptrack live _after_ writing the pte, in case we
445 * overwrite the same frame number, causing a maptrack walk to find it
446 */
447 ld->grant_table->maptrack[handle].domid = dom;
449 ld->grant_table->maptrack[handle].ref_and_flags
450 = (ref << MAPTRACK_REF_SHIFT) |
451 (dev_hst_ro_flags & MAPTRACK_GNTMAP_MASK);
453 (void)__put_user((u64)frame << PAGE_SHIFT, &uop->dev_bus_addr);
455 if ( ( dev_hst_ro_flags & GNTMAP_host_map ) &&
456 !( dev_hst_ro_flags & GNTMAP_contains_pte) )
457 *va = addr;
459 (void)__put_user(handle, &uop->handle);
460 }
461 else
462 {
463 (void)__put_user(rc, &uop->handle);
464 put_maptrack_handle(ld->grant_table, handle);
465 }
467 put_domain(rd);
468 return rc;
469 }
471 static long
472 gnttab_map_grant_ref(
473 gnttab_map_grant_ref_t *uop, unsigned int count)
474 {
475 int i, rc, flush = 0;
476 unsigned long va = 0;
478 for ( i = 0; i < count; i++ )
479 if ( (rc =__gnttab_map_grant_ref(&uop[i], &va)) >= 0 )
480 flush += rc;
482 if ( flush == 1 )
483 flush_tlb_one_mask(current->domain->cpumask, va);
484 else if ( flush != 0 )
485 flush_tlb_mask(current->domain->cpumask);
487 return 0;
488 }
490 static int
491 __gnttab_unmap_grant_ref(
492 gnttab_unmap_grant_ref_t *uop,
493 unsigned long *va)
494 {
495 domid_t dom;
496 grant_ref_t ref;
497 u16 handle;
498 struct domain *ld, *rd;
499 active_grant_entry_t *act;
500 grant_entry_t *sha;
501 grant_mapping_t *map;
502 u16 flags;
503 s16 rc = 1;
504 u64 addr, dev_bus_addr;
505 unsigned long frame;
507 ld = current->domain;
509 /* Bitwise-OR avoids short-circuiting which screws control flow. */
510 if ( unlikely(__get_user(addr, &uop->host_addr) |
511 __get_user(dev_bus_addr, &uop->dev_bus_addr) |
512 __get_user(handle, &uop->handle)) )
513 {
514 DPRINTK("Fault while reading gnttab_unmap_grant_ref_t.\n");
515 return -EFAULT; /* don't set status */
516 }
518 frame = (unsigned long)(dev_bus_addr >> PAGE_SHIFT);
520 map = &ld->grant_table->maptrack[handle];
522 if ( unlikely(handle >= ld->grant_table->maptrack_limit) ||
523 unlikely(!(map->ref_and_flags & MAPTRACK_GNTMAP_MASK)) )
524 {
525 DPRINTK("Bad handle (%d).\n", handle);
526 (void)__put_user(GNTST_bad_handle, &uop->status);
527 return GNTST_bad_handle;
528 }
530 dom = map->domid;
531 ref = map->ref_and_flags >> MAPTRACK_REF_SHIFT;
532 flags = map->ref_and_flags & MAPTRACK_GNTMAP_MASK;
534 if ( unlikely((rd = find_domain_by_id(dom)) == NULL) ||
535 unlikely(ld == rd) )
536 {
537 if ( rd != NULL )
538 put_domain(rd);
539 DPRINTK("Could not find domain %d\n", dom);
540 (void)__put_user(GNTST_bad_domain, &uop->status);
541 return GNTST_bad_domain;
542 }
544 #if GRANT_DEBUG_VERBOSE
545 DPRINTK("Unmapping grant ref (%hu) for domain (%hu) with handle (%hu)\n",
546 ref, dom, handle);
547 #endif
549 act = &rd->grant_table->active[ref];
550 sha = &rd->grant_table->shared[ref];
552 spin_lock(&rd->grant_table->lock);
554 if ( frame == 0 )
555 {
556 frame = act->frame;
557 }
558 else
559 {
560 if ( unlikely(frame != act->frame) )
561 PIN_FAIL(unmap_out, GNTST_general_error,
562 "Bad frame number doesn't match gntref.\n");
563 if ( flags & GNTMAP_device_map )
564 act->pin -= (flags & GNTMAP_readonly) ? GNTPIN_devr_inc
565 : GNTPIN_devw_inc;
567 map->ref_and_flags &= ~GNTMAP_device_map;
568 (void)__put_user(0, &uop->dev_bus_addr);
570 /* Frame is now unmapped for device access. */
571 }
573 if ( (addr != 0) &&
574 (flags & GNTMAP_host_map) &&
575 ((act->pin & (GNTPIN_hstw_mask | GNTPIN_hstr_mask)) > 0))
576 {
577 if ( flags & GNTMAP_contains_pte )
578 {
579 if ( (rc = clear_grant_pte_mapping(addr, frame, ld)) < 0 )
580 goto unmap_out;
581 }
582 else
583 {
584 if ( (rc = clear_grant_va_mapping(addr, frame)) < 0 )
585 goto unmap_out;
586 }
588 map->ref_and_flags &= ~GNTMAP_host_map;
590 act->pin -= (flags & GNTMAP_readonly) ? GNTPIN_hstr_inc
591 : GNTPIN_hstw_inc;
593 rc = 0;
594 if ( !( flags & GNTMAP_contains_pte) )
595 *va = addr;
596 }
598 if ( (map->ref_and_flags & (GNTMAP_device_map|GNTMAP_host_map)) == 0)
599 {
600 map->ref_and_flags = 0;
601 put_maptrack_handle(ld->grant_table, handle);
602 }
604 /* If just unmapped a writable mapping, mark as dirtied */
605 if ( unlikely(shadow_mode_log_dirty(rd)) &&
606 !( flags & GNTMAP_readonly ) )
607 mark_dirty(rd, frame);
609 /* If the last writable mapping has been removed, put_page_type */
610 if ( ( (act->pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask) ) == 0) &&
611 ( !( flags & GNTMAP_readonly ) ) )
612 {
613 clear_bit(_GTF_writing, &sha->flags);
614 put_page_type(&frame_table[frame]);
615 }
617 if ( act->pin == 0 )
618 {
619 act->frame = 0xdeadbeef;
620 clear_bit(_GTF_reading, &sha->flags);
621 put_page(&frame_table[frame]);
622 }
624 unmap_out:
625 (void)__put_user(rc, &uop->status);
626 spin_unlock(&rd->grant_table->lock);
627 put_domain(rd);
628 return rc;
629 }
631 static long
632 gnttab_unmap_grant_ref(
633 gnttab_unmap_grant_ref_t *uop, unsigned int count)
634 {
635 int i, flush = 0;
636 unsigned long va = 0;
638 for ( i = 0; i < count; i++ )
639 if ( __gnttab_unmap_grant_ref(&uop[i], &va) == 0 )
640 flush++;
642 if ( flush == 1 )
643 flush_tlb_one_mask(current->domain->cpumask, va);
644 else if ( flush != 0 )
645 flush_tlb_mask(current->domain->cpumask);
647 return 0;
648 }
650 static long
651 gnttab_setup_table(
652 gnttab_setup_table_t *uop, unsigned int count)
653 {
654 gnttab_setup_table_t op;
655 struct domain *d;
656 int i;
658 if ( count != 1 )
659 return -EINVAL;
661 if ( unlikely(copy_from_user(&op, uop, sizeof(op)) != 0) )
662 {
663 DPRINTK("Fault while reading gnttab_setup_table_t.\n");
664 return -EFAULT;
665 }
667 if ( unlikely(op.nr_frames > NR_GRANT_FRAMES) )
668 {
669 DPRINTK("Xen only supports up to %d grant-table frames per domain.\n",
670 NR_GRANT_FRAMES);
671 (void)put_user(GNTST_general_error, &uop->status);
672 return 0;
673 }
675 if ( op.dom == DOMID_SELF )
676 {
677 op.dom = current->domain->domain_id;
678 }
679 else if ( unlikely(!IS_PRIV(current->domain)) )
680 {
681 (void)put_user(GNTST_permission_denied, &uop->status);
682 return 0;
683 }
685 if ( unlikely((d = find_domain_by_id(op.dom)) == NULL) )
686 {
687 DPRINTK("Bad domid %d.\n", op.dom);
688 (void)put_user(GNTST_bad_domain, &uop->status);
689 return 0;
690 }
692 if ( op.nr_frames <= NR_GRANT_FRAMES )
693 {
694 ASSERT(d->grant_table != NULL);
695 (void)put_user(GNTST_okay, &uop->status);
696 for ( i = 0; i < op.nr_frames; i++ )
697 (void)put_user(
698 (virt_to_phys(d->grant_table->shared) >> PAGE_SHIFT) + i,
699 &uop->frame_list[i]);
700 }
702 put_domain(d);
703 return 0;
704 }
706 #if GRANT_DEBUG
707 static int
708 gnttab_dump_table(gnttab_dump_table_t *uop)
709 {
710 grant_table_t *gt;
711 gnttab_dump_table_t op;
712 struct domain *d;
713 u32 shared_mfn;
714 active_grant_entry_t *act;
715 grant_entry_t sha_copy;
716 grant_mapping_t *maptrack;
717 int i;
720 if ( unlikely(copy_from_user(&op, uop, sizeof(op)) != 0) )
721 {
722 DPRINTK("Fault while reading gnttab_dump_table_t.\n");
723 return -EFAULT;
724 }
726 if ( op.dom == DOMID_SELF )
727 {
728 op.dom = current->domain->domain_id;
729 }
731 if ( unlikely((d = find_domain_by_id(op.dom)) == NULL) )
732 {
733 DPRINTK("Bad domid %d.\n", op.dom);
734 (void)put_user(GNTST_bad_domain, &uop->status);
735 return 0;
736 }
738 ASSERT(d->grant_table != NULL);
739 gt = d->grant_table;
740 (void)put_user(GNTST_okay, &uop->status);
742 shared_mfn = virt_to_phys(d->grant_table->shared);
744 DPRINTK("Grant table for dom (%hu) MFN (%x)\n",
745 op.dom, shared_mfn);
747 ASSERT(d->grant_table->active != NULL);
748 ASSERT(d->grant_table->shared != NULL);
749 ASSERT(d->grant_table->maptrack != NULL);
751 for ( i = 0; i < NR_GRANT_ENTRIES; i++ )
752 {
753 sha_copy = gt->shared[i];
755 if ( sha_copy.flags )
756 {
757 DPRINTK("Grant: dom (%hu) SHARED (%d) flags:(%hx) "
758 "dom:(%hu) frame:(%x)\n",
759 op.dom, i, sha_copy.flags, sha_copy.domid, sha_copy.frame);
760 }
761 }
763 spin_lock(&gt->lock);
765 for ( i = 0; i < NR_GRANT_ENTRIES; i++ )
766 {
767 act = &gt->active[i];
769 if ( act->pin )
770 {
771 DPRINTK("Grant: dom (%hu) ACTIVE (%d) pin:(%x) "
772 "dom:(%hu) frame:(%lx)\n",
773 op.dom, i, act->pin, act->domid, act->frame);
774 }
775 }
777 for ( i = 0; i < gt->maptrack_limit; i++ )
778 {
779 maptrack = &gt->maptrack[i];
781 if ( maptrack->ref_and_flags & MAPTRACK_GNTMAP_MASK )
782 {
783 DPRINTK("Grant: dom (%hu) MAP (%d) ref:(%hu) flags:(%x) "
784 "dom:(%hu)\n",
785 op.dom, i,
786 maptrack->ref_and_flags >> MAPTRACK_REF_SHIFT,
787 maptrack->ref_and_flags & MAPTRACK_GNTMAP_MASK,
788 maptrack->domid);
789 }
790 }
792 spin_unlock(&gt->lock);
794 put_domain(d);
795 return 0;
796 }
797 #endif
799 static long
800 gnttab_donate(gnttab_donate_t *uop, unsigned int count)
801 {
802 struct domain *d = current->domain;
803 struct domain *e;
804 struct pfn_info *page;
805 u32 _d, _nd, x, y;
806 int i;
807 int result = GNTST_okay;
809 for ( i = 0; i < count; i++ )
810 {
811 gnttab_donate_t *gop = &uop[i];
812 #if GRANT_DEBUG
813 printk("gnttab_donate: i=%d mfn=%lx domid=%d gref=%08x\n",
814 i, gop->mfn, gop->domid, gop->handle);
815 #endif
816 page = &frame_table[gop->mfn];
818 if ( unlikely(IS_XEN_HEAP_FRAME(page)))
819 {
820 printk("gnttab_donate: xen heap frame mfn=%lx\n",
821 (unsigned long) gop->mfn);
822 gop->status = GNTST_bad_virt_addr;
823 continue;
824 }
826 if ( unlikely(!pfn_valid(page_to_pfn(page))) )
827 {
828 printk("gnttab_donate: invalid pfn for mfn=%lx\n",
829 (unsigned long) gop->mfn);
830 gop->status = GNTST_bad_virt_addr;
831 continue;
832 }
834 if ( unlikely((e = find_domain_by_id(gop->domid)) == NULL) )
835 {
836 printk("gnttab_donate: can't find domain %d\n", gop->domid);
837 gop->status = GNTST_bad_domain;
838 continue;
839 }
841 spin_lock(&d->page_alloc_lock);
843 /*
844 * The tricky bit: atomically release ownership while
845 * there is just one benign reference to the page
846 * (PGC_allocated). If that reference disappears then the
847 * deallocation routine will safely spin.
848 */
849 _d = pickle_domptr(d);
850 _nd = page->u.inuse._domain;
851 y = page->count_info;
852 do {
853 x = y;
854 if (unlikely((x & (PGC_count_mask|PGC_allocated)) !=
855 (1 | PGC_allocated)) || unlikely(_nd != _d)) {
856 printk("gnttab_donate: Bad page values %p: ed=%p(%u), sd=%p,"
857 " caf=%08x, taf=%" PRtype_info "\n",
858 (void *) page_to_pfn(page),
859 d, d->domain_id, unpickle_domptr(_nd), x,
860 page->u.inuse.type_info);
861 spin_unlock(&d->page_alloc_lock);
862 put_domain(e);
863 return 0;
864 }
865 __asm__ __volatile__(
866 LOCK_PREFIX "cmpxchg8b %2"
867 : "=d" (_nd), "=a" (y),
868 "=m" (*(volatile u64 *)(&page->count_info))
869 : "0" (_d), "1" (x), "c" (NULL), "b" (x) );
870 } while (unlikely(_nd != _d) || unlikely(y != x));
872 /*
873 * Unlink from 'd'. At least one reference remains (now
874 * anonymous), so noone else is spinning to try to delete
875 * this page from 'd'.
876 */
877 d->tot_pages--;
878 list_del(&page->list);
880 spin_unlock(&d->page_alloc_lock);
882 spin_lock(&e->page_alloc_lock);
884 /*
885 * Check that 'e' will accept the page and has reservation
886 * headroom. Also, a domain mustn't have PGC_allocated
887 * pages when it is dying.
888 */
889 if ( unlikely(test_bit(DOMFLAGS_DYING, &e->domain_flags)) ||
890 unlikely(e->tot_pages >= e->max_pages) ||
891 unlikely(!gnttab_prepare_for_transfer(e, d, gop->handle)) )
892 {
893 DPRINTK("gnttab_donate: Transferee has no reservation headroom "
894 "(%d,%d) or provided a bad grant ref (%08x) or "
895 "is dying (%lx)\n",
896 e->tot_pages, e->max_pages, gop->handle, e->domain_flags);
897 spin_unlock(&e->page_alloc_lock);
898 put_domain(e);
899 gop->status = result = GNTST_general_error;
900 break;
901 }
903 /* Okay, add the page to 'e'. */
904 if ( unlikely(e->tot_pages++ == 0) )
905 get_knownalive_domain(e);
906 list_add_tail(&page->list, &e->page_list);
907 page_set_owner(page, e);
909 spin_unlock(&e->page_alloc_lock);
911 /*
912 * Transfer is all done: tell the guest about its new page
913 * frame.
914 */
915 gnttab_notify_transfer(e, d, gop->handle, gop->mfn);
917 put_domain(e);
919 gop->status = GNTST_okay;
920 }
922 return result;
923 }
925 long
926 do_grant_table_op(
927 unsigned int cmd, void *uop, unsigned int count)
928 {
929 long rc;
930 struct domain *d = current->domain;
932 if ( count > 512 )
933 return -EINVAL;
935 LOCK_BIGLOCK(d);
937 sync_pagetable_state(d);
939 rc = -EFAULT;
940 switch ( cmd )
941 {
942 case GNTTABOP_map_grant_ref:
943 if ( unlikely(!array_access_ok(
944 uop, count, sizeof(gnttab_map_grant_ref_t))) )
945 goto out;
946 rc = gnttab_map_grant_ref((gnttab_map_grant_ref_t *)uop, count);
947 break;
948 case GNTTABOP_unmap_grant_ref:
949 if ( unlikely(!array_access_ok(
950 uop, count, sizeof(gnttab_unmap_grant_ref_t))) )
951 goto out;
952 rc = gnttab_unmap_grant_ref(
953 (gnttab_unmap_grant_ref_t *)uop, count);
954 break;
955 case GNTTABOP_setup_table:
956 rc = gnttab_setup_table((gnttab_setup_table_t *)uop, count);
957 break;
958 #if GRANT_DEBUG
959 case GNTTABOP_dump_table:
960 rc = gnttab_dump_table((gnttab_dump_table_t *)uop);
961 break;
962 #endif
963 case GNTTABOP_donate:
964 if (unlikely(!array_access_ok(
965 uop, count, sizeof(gnttab_donate_t))))
966 goto out;
967 rc = gnttab_donate(uop, count);
968 break;
969 default:
970 rc = -ENOSYS;
971 break;
972 }
974 out:
975 UNLOCK_BIGLOCK(d);
977 return rc;
978 }
980 int
981 gnttab_check_unmap(
982 struct domain *rd, struct domain *ld, unsigned long frame, int readonly)
983 {
984 /* Called when put_page is invoked on a page belonging to a foreign domain.
985 * Instead of decrementing the frame table ref count, locate the grant
986 * table entry, if any, and if found, decrement that count.
987 * Called a _lot_ at domain creation because pages mapped by priv domains
988 * also traverse this.
989 */
991 /* Note: If the same frame is mapped multiple times, and then one of
992 * the ptes is overwritten, which maptrack handle gets invalidated?
993 * Advice: Don't do it. Explicitly unmap.
994 */
996 unsigned int handle, ref, refcount;
997 grant_table_t *lgt, *rgt;
998 active_grant_entry_t *act;
999 grant_mapping_t *map;
1000 int found = 0;
1002 lgt = ld->grant_table;
1004 #if GRANT_DEBUG_VERBOSE
1005 if ( ld->domain_id != 0 )
1006 DPRINTK("Foreign unref rd(%d) ld(%d) frm(%lx) flgs(%x).\n",
1007 rd->domain_id, ld->domain_id, frame, readonly);
1008 #endif
1010 /* Fast exit if we're not mapping anything using grant tables */
1011 if ( lgt->map_count == 0 )
1012 return 0;
1014 if ( get_domain(rd) == 0 )
1016 DPRINTK("gnttab_check_unmap: couldn't get_domain rd(%d)\n",
1017 rd->domain_id);
1018 return 0;
1021 rgt = rd->grant_table;
1023 for ( handle = 0; handle < lgt->maptrack_limit; handle++ ) {
1025 map = &lgt->maptrack[handle];
1027 if ( map->domid != rd->domain_id )
1028 continue;
1030 if ( ( map->ref_and_flags & MAPTRACK_GNTMAP_MASK ) &&
1031 ( readonly ? 1 : (!(map->ref_and_flags & GNTMAP_readonly)))) {
1033 ref = (map->ref_and_flags >> MAPTRACK_REF_SHIFT);
1034 act = &rgt->active[ref];
1036 spin_lock(&rgt->lock);
1038 if ( act->frame != frame ) {
1039 spin_unlock(&rgt->lock);
1040 continue;
1043 refcount = act->pin & ( readonly ? GNTPIN_hstr_mask
1044 : GNTPIN_hstw_mask );
1046 if ( refcount == 0 ) {
1047 spin_unlock(&rgt->lock);
1048 continue;
1051 /* gotcha */
1052 DPRINTK("Grant unref rd(%d) ld(%d) frm(%lx) flgs(%x).\n",
1053 rd->domain_id, ld->domain_id, frame, readonly);
1055 if ( readonly )
1056 act->pin -= GNTPIN_hstr_inc;
1057 else {
1058 act->pin -= GNTPIN_hstw_inc;
1060 /* any more granted writable mappings? */
1061 if ( (act->pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) == 0 ) {
1062 clear_bit(_GTF_writing, &rgt->shared[ref].flags);
1063 put_page_type(&frame_table[frame]);
1067 if ( act->pin == 0 ) {
1068 clear_bit(_GTF_reading, &rgt->shared[ref].flags);
1069 put_page(&frame_table[frame]);
1072 spin_unlock(&rgt->lock);
1074 clear_bit(GNTMAP_host_map, &map->ref_and_flags);
1076 if ( !(map->ref_and_flags & GNTMAP_device_map) )
1077 put_maptrack_handle(lgt, handle);
1079 found = 1;
1080 break;
1083 put_domain(rd);
1085 return found;
1088 int
1089 gnttab_prepare_for_transfer(
1090 struct domain *rd, struct domain *ld, grant_ref_t ref)
1092 grant_table_t *rgt;
1093 grant_entry_t *sha;
1094 domid_t sdom;
1095 u16 sflags;
1096 u32 scombo, prev_scombo;
1097 int retries = 0;
1098 unsigned long target_pfn;
1100 #if GRANT_DEBUG_VERBOSE
1101 DPRINTK("gnttab_prepare_for_transfer rd(%hu) ld(%hu) ref(%hu).\n",
1102 rd->domain_id, ld->domain_id, ref);
1103 #endif
1105 if ( unlikely((rgt = rd->grant_table) == NULL) ||
1106 unlikely(ref >= NR_GRANT_ENTRIES) )
1108 DPRINTK("Dom %d has no g.t., or ref is bad (%d).\n",
1109 rd->domain_id, ref);
1110 return 0;
1113 spin_lock(&rgt->lock);
1115 sha = &rgt->shared[ref];
1117 sflags = sha->flags;
1118 sdom = sha->domid;
1120 for ( ; ; )
1122 target_pfn = sha->frame;
1124 if ( unlikely(target_pfn >= max_page ) )
1126 DPRINTK("Bad pfn (%lx)\n", target_pfn);
1127 goto fail;
1130 if ( unlikely(sflags != GTF_accept_transfer) ||
1131 unlikely(sdom != ld->domain_id) )
1133 DPRINTK("Bad flags (%x) or dom (%d). (NB. expected dom %d)\n",
1134 sflags, sdom, ld->domain_id);
1135 goto fail;
1138 /* Merge two 16-bit values into a 32-bit combined update. */
1139 /* NB. Endianness! */
1140 prev_scombo = scombo = ((u32)sdom << 16) | (u32)sflags;
1142 /* NB. prev_scombo is updated in place to seen value. */
1143 if ( unlikely(cmpxchg_user((u32 *)&sha->flags, prev_scombo,
1144 prev_scombo | GTF_transfer_committed)) )
1146 DPRINTK("Fault while modifying shared flags and domid.\n");
1147 goto fail;
1150 /* Did the combined update work (did we see what we expected?). */
1151 if ( likely(prev_scombo == scombo) )
1152 break;
1154 if ( retries++ == 4 )
1156 DPRINTK("Shared grant entry is unstable.\n");
1157 goto fail;
1160 /* Didn't see what we expected. Split out the seen flags & dom. */
1161 /* NB. Endianness! */
1162 sflags = (u16)prev_scombo;
1163 sdom = (u16)(prev_scombo >> 16);
1166 spin_unlock(&rgt->lock);
1167 return 1;
1169 fail:
1170 spin_unlock(&rgt->lock);
1171 return 0;
1174 void
1175 gnttab_notify_transfer(
1176 struct domain *rd, struct domain *ld, grant_ref_t ref, unsigned long frame)
1178 grant_entry_t *sha;
1179 unsigned long pfn;
1181 #if GRANT_DEBUG_VERBOSE
1182 DPRINTK("gnttab_notify_transfer rd(%hu) ld(%hu) ref(%hu).\n",
1183 rd->domain_id, ld->domain_id, ref);
1184 #endif
1186 sha = &rd->grant_table->shared[ref];
1188 spin_lock(&rd->grant_table->lock);
1190 pfn = sha->frame;
1192 if ( unlikely(pfn >= max_page ) )
1193 DPRINTK("Bad pfn (%lx)\n", pfn);
1194 else
1196 set_pfn_from_mfn(frame, pfn);
1198 if ( unlikely(shadow_mode_log_dirty(ld)))
1199 mark_dirty(ld, frame);
1201 if (shadow_mode_translate(ld))
1202 set_mfn_from_pfn(pfn, frame);
1204 sha->frame = __mfn_to_gpfn(rd, frame);
1205 sha->domid = rd->domain_id;
1206 wmb();
1207 sha->flags = ( GTF_accept_transfer | GTF_transfer_completed );
1209 spin_unlock(&rd->grant_table->lock);
1211 return;
1214 int
1215 grant_table_create(
1216 struct domain *d)
1218 grant_table_t *t;
1219 int i;
1221 if ( (t = xmalloc(grant_table_t)) == NULL )
1222 goto no_mem;
1224 /* Simple stuff. */
1225 memset(t, 0, sizeof(*t));
1226 spin_lock_init(&t->lock);
1228 /* Active grant table. */
1229 if ( (t->active = xmalloc_array(active_grant_entry_t, NR_GRANT_ENTRIES))
1230 == NULL )
1231 goto no_mem;
1232 memset(t->active, 0, sizeof(active_grant_entry_t) * NR_GRANT_ENTRIES);
1234 /* Tracking of mapped foreign frames table */
1235 if ( (t->maptrack = alloc_xenheap_page()) == NULL )
1236 goto no_mem;
1237 t->maptrack_order = 0;
1238 t->maptrack_limit = PAGE_SIZE / sizeof(grant_mapping_t);
1239 memset(t->maptrack, 0, PAGE_SIZE);
1240 for ( i = 0; i < t->maptrack_limit; i++ )
1241 t->maptrack[i].ref_and_flags = (i+1) << MAPTRACK_REF_SHIFT;
1243 /* Shared grant table. */
1244 t->shared = alloc_xenheap_pages(ORDER_GRANT_FRAMES);
1245 if ( t->shared == NULL )
1246 goto no_mem;
1247 memset(t->shared, 0, NR_GRANT_FRAMES * PAGE_SIZE);
1249 for ( i = 0; i < NR_GRANT_FRAMES; i++ )
1251 SHARE_PFN_WITH_DOMAIN(
1252 virt_to_page((char *)t->shared + (i * PAGE_SIZE)),
1253 d);
1254 set_pfn_from_mfn(
1255 (virt_to_phys(t->shared) >> PAGE_SHIFT) + i,
1256 INVALID_M2P_ENTRY);
1259 /* Okay, install the structure. */
1260 wmb(); /* avoid races with lock-free access to d->grant_table */
1261 d->grant_table = t;
1262 return 0;
1264 no_mem:
1265 if ( t != NULL )
1267 xfree(t->active);
1268 if ( t->maptrack != NULL )
1269 free_xenheap_page(t->maptrack);
1270 xfree(t);
1272 return -ENOMEM;
1275 void
1276 gnttab_release_dev_mappings(grant_table_t *gt)
1278 grant_mapping_t *map;
1279 domid_t dom;
1280 grant_ref_t ref;
1281 u16 handle;
1282 struct domain *ld, *rd;
1283 unsigned long frame;
1284 active_grant_entry_t *act;
1285 grant_entry_t *sha;
1287 ld = current->domain;
1289 for ( handle = 0; handle < gt->maptrack_limit; handle++ )
1291 map = &gt->maptrack[handle];
1293 if ( !(map->ref_and_flags & GNTMAP_device_map) )
1294 continue;
1296 dom = map->domid;
1297 ref = map->ref_and_flags >> MAPTRACK_REF_SHIFT;
1299 DPRINTK("Grant release (%hu) ref:(%hu) flags:(%x) dom:(%hu)\n",
1300 handle, ref, map->ref_and_flags & MAPTRACK_GNTMAP_MASK, dom);
1302 if ( unlikely((rd = find_domain_by_id(dom)) == NULL) ||
1303 unlikely(ld == rd) )
1305 if ( rd != NULL )
1306 put_domain(rd);
1307 printk(KERN_WARNING "Grant release: No dom%d\n", dom);
1308 continue;
1311 act = &rd->grant_table->active[ref];
1312 sha = &rd->grant_table->shared[ref];
1314 spin_lock(&rd->grant_table->lock);
1316 if ( act->pin & (GNTPIN_devw_mask | GNTPIN_devr_mask) )
1318 frame = act->frame;
1320 if ( ( (act->pin & GNTPIN_hstw_mask) == 0 ) &&
1321 ( (act->pin & GNTPIN_devw_mask) > 0 ) )
1323 clear_bit(_GTF_writing, &sha->flags);
1324 put_page_type(&frame_table[frame]);
1327 map->ref_and_flags &= ~GNTMAP_device_map;
1328 act->pin &= ~(GNTPIN_devw_mask | GNTPIN_devr_mask);
1329 if ( act->pin == 0 )
1331 clear_bit(_GTF_reading, &sha->flags);
1332 map->ref_and_flags = 0;
1333 put_page(&frame_table[frame]);
1337 spin_unlock(&rd->grant_table->lock);
1339 put_domain(rd);
1344 void
1345 grant_table_destroy(
1346 struct domain *d)
1348 grant_table_t *t;
1350 if ( (t = d->grant_table) != NULL )
1352 /* Free memory relating to this grant table. */
1353 d->grant_table = NULL;
1354 free_xenheap_pages(t->shared, ORDER_GRANT_FRAMES);
1355 free_xenheap_page(t->maptrack);
1356 xfree(t->active);
1357 xfree(t);
1361 void
1362 grant_table_init(
1363 void)
1365 /* Nothing. */
1368 /*
1369 * Local variables:
1370 * mode: C
1371 * c-set-style: "BSD"
1372 * c-basic-offset: 4
1373 * tab-width: 4
1374 * indent-tabs-mode: nil
1375 * End:
1376 */