ia64/xen-unstable

view xen/common/grant_table.c @ 6552:a9873d384da4

Merge.
author adsharma@los-vmm.sc.intel.com
date Thu Aug 25 12:24:48 2005 -0700 (2005-08-25)
parents 112d44270733 fa0754a9f64f
children dfaf788ab18c
line source
1 /******************************************************************************
2 * common/grant_table.c
3 *
4 * Mechanism for granting foreign access to page frames, and receiving
5 * page-ownership transfers.
6 *
7 * Copyright (c) 2005 Christopher Clark
8 * Copyright (c) 2004 K A Fraser
9 * Copyright (c) 2005 Andrew Warfield
10 * Modifications by Geoffrey Lefebvre are (c) Intel Research Cambridge
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
16 *
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 */
27 #define GRANT_DEBUG 0
28 #define GRANT_DEBUG_VERBOSE 0
30 #include <xen/config.h>
31 #include <xen/lib.h>
32 #include <xen/sched.h>
33 #include <xen/shadow.h>
34 #include <xen/mm.h>
35 #include <acm/acm_hooks.h>
37 #if defined(CONFIG_X86_64)
38 #define GRANT_PTE_FLAGS (_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
39 #else
40 #define GRANT_PTE_FLAGS (_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_DIRTY)
41 #endif
43 #define PIN_FAIL(_lbl, _rc, _f, _a...) \
44 do { \
45 DPRINTK( _f, ## _a ); \
46 rc = (_rc); \
47 goto _lbl; \
48 } while ( 0 )
50 static inline int
51 get_maptrack_handle(
52 grant_table_t *t)
53 {
54 unsigned int h;
55 if ( unlikely((h = t->maptrack_head) == (t->maptrack_limit - 1)) )
56 return -1;
57 t->maptrack_head = t->maptrack[h].ref_and_flags >> MAPTRACK_REF_SHIFT;
58 t->map_count++;
59 return h;
60 }
62 static inline void
63 put_maptrack_handle(
64 grant_table_t *t, int handle)
65 {
66 t->maptrack[handle].ref_and_flags = t->maptrack_head << MAPTRACK_REF_SHIFT;
67 t->maptrack_head = handle;
68 t->map_count--;
69 }
71 static int
72 __gnttab_activate_grant_ref(
73 struct domain *mapping_d, /* IN */
74 struct vcpu *mapping_ed,
75 struct domain *granting_d,
76 grant_ref_t ref,
77 u16 dev_hst_ro_flags,
78 u64 addr,
79 unsigned long *pframe ) /* OUT */
80 {
81 domid_t sdom;
82 u16 sflags;
83 active_grant_entry_t *act;
84 grant_entry_t *sha;
85 s16 rc = 1;
86 unsigned long frame = 0;
87 int retries = 0;
89 /*
90 * Objectives of this function:
91 * . Make the record ( granting_d, ref ) active, if not already.
92 * . Update shared grant entry of owner, indicating frame is mapped.
93 * . Increment the owner act->pin reference counts.
94 * . get_page on shared frame if new mapping.
95 * . get_page_type if this is first RW mapping of frame.
96 * . Add PTE to virtual address space of mapping_d, if necessary.
97 * Returns:
98 * . -ve: error
99 * . 1: ok
100 * . 0: ok and TLB invalidate of host_addr needed.
101 *
102 * On success, *pframe contains mfn.
103 */
105 /*
106 * We bound the number of times we retry CMPXCHG on memory locations that
107 * we share with a guest OS. The reason is that the guest can modify that
108 * location at a higher rate than we can read-modify-CMPXCHG, so the guest
109 * could cause us to livelock. There are a few cases where it is valid for
110 * the guest to race our updates (e.g., to change the GTF_readonly flag),
111 * so we allow a few retries before failing.
112 */
114 act = &granting_d->grant_table->active[ref];
115 sha = &granting_d->grant_table->shared[ref];
117 spin_lock(&granting_d->grant_table->lock);
119 if ( act->pin == 0 )
120 {
121 /* CASE 1: Activating a previously inactive entry. */
123 sflags = sha->flags;
124 sdom = sha->domid;
126 /* This loop attempts to set the access (reading/writing) flags
127 * in the grant table entry. It tries a cmpxchg on the field
128 * up to five times, and then fails under the assumption that
129 * the guest is misbehaving. */
130 for ( ; ; )
131 {
132 u32 scombo, prev_scombo, new_scombo;
134 if ( unlikely((sflags & GTF_type_mask) != GTF_permit_access) ||
135 unlikely(sdom != mapping_d->domain_id) )
136 PIN_FAIL(unlock_out, GNTST_general_error,
137 "Bad flags (%x) or dom (%d). (NB. expected dom %d)\n",
138 sflags, sdom, mapping_d->domain_id);
140 /* Merge two 16-bit values into a 32-bit combined update. */
141 /* NB. Endianness! */
142 prev_scombo = scombo = ((u32)sdom << 16) | (u32)sflags;
144 new_scombo = scombo | GTF_reading;
145 if ( !(dev_hst_ro_flags & GNTMAP_readonly) )
146 {
147 new_scombo |= GTF_writing;
148 if ( unlikely(sflags & GTF_readonly) )
149 PIN_FAIL(unlock_out, GNTST_general_error,
150 "Attempt to write-pin a r/o grant entry.\n");
151 }
153 /* NB. prev_scombo is updated in place to seen value. */
154 if ( unlikely(cmpxchg_user((u32 *)&sha->flags,
155 prev_scombo,
156 new_scombo)) )
157 PIN_FAIL(unlock_out, GNTST_general_error,
158 "Fault while modifying shared flags and domid.\n");
160 /* Did the combined update work (did we see what we expected?). */
161 if ( likely(prev_scombo == scombo) )
162 break;
164 if ( retries++ == 4 )
165 PIN_FAIL(unlock_out, GNTST_general_error,
166 "Shared grant entry is unstable.\n");
168 /* Didn't see what we expected. Split out the seen flags & dom. */
169 /* NB. Endianness! */
170 sflags = (u16)prev_scombo;
171 sdom = (u16)(prev_scombo >> 16);
172 }
174 /* rmb(); */ /* not on x86 */
176 frame = __gpfn_to_mfn_foreign(granting_d, sha->frame);
178 if ( unlikely(!pfn_valid(frame)) ||
179 unlikely(!((dev_hst_ro_flags & GNTMAP_readonly) ?
180 get_page(&frame_table[frame], granting_d) :
181 get_page_and_type(&frame_table[frame], granting_d,
182 PGT_writable_page))) )
183 {
184 clear_bit(_GTF_writing, &sha->flags);
185 clear_bit(_GTF_reading, &sha->flags);
186 PIN_FAIL(unlock_out, GNTST_general_error,
187 "Could not pin the granted frame (%lx)!\n", frame);
188 }
190 if ( dev_hst_ro_flags & GNTMAP_device_map )
191 act->pin += (dev_hst_ro_flags & GNTMAP_readonly) ?
192 GNTPIN_devr_inc : GNTPIN_devw_inc;
193 if ( dev_hst_ro_flags & GNTMAP_host_map )
194 act->pin += (dev_hst_ro_flags & GNTMAP_readonly) ?
195 GNTPIN_hstr_inc : GNTPIN_hstw_inc;
196 act->domid = sdom;
197 act->frame = frame;
198 }
199 else
200 {
201 /* CASE 2: Active modications to an already active entry. */
203 /*
204 * A cheesy check for possible pin-count overflow.
205 * A more accurate check cannot be done with a single comparison.
206 */
207 if ( (act->pin & 0x80808080U) != 0 )
208 PIN_FAIL(unlock_out, ENOSPC,
209 "Risk of counter overflow %08x\n", act->pin);
211 frame = act->frame;
213 if ( !(dev_hst_ro_flags & GNTMAP_readonly) &&
214 !((sflags = sha->flags) & GTF_writing) )
215 {
216 for ( ; ; )
217 {
218 u16 prev_sflags;
220 if ( unlikely(sflags & GTF_readonly) )
221 PIN_FAIL(unlock_out, GNTST_general_error,
222 "Attempt to write-pin a r/o grant entry.\n");
224 prev_sflags = sflags;
226 /* NB. prev_sflags is updated in place to seen value. */
227 if ( unlikely(cmpxchg_user(&sha->flags, prev_sflags,
228 prev_sflags | GTF_writing)) )
229 PIN_FAIL(unlock_out, GNTST_general_error,
230 "Fault while modifying shared flags.\n");
232 if ( likely(prev_sflags == sflags) )
233 break;
235 if ( retries++ == 4 )
236 PIN_FAIL(unlock_out, GNTST_general_error,
237 "Shared grant entry is unstable.\n");
239 sflags = prev_sflags;
240 }
242 if ( unlikely(!get_page_type(&frame_table[frame],
243 PGT_writable_page)) )
244 {
245 clear_bit(_GTF_writing, &sha->flags);
246 PIN_FAIL(unlock_out, GNTST_general_error,
247 "Attempt to write-pin a unwritable page.\n");
248 }
249 }
251 if ( dev_hst_ro_flags & GNTMAP_device_map )
252 act->pin += (dev_hst_ro_flags & GNTMAP_readonly) ?
253 GNTPIN_devr_inc : GNTPIN_devw_inc;
255 if ( dev_hst_ro_flags & GNTMAP_host_map )
256 act->pin += (dev_hst_ro_flags & GNTMAP_readonly) ?
257 GNTPIN_hstr_inc : GNTPIN_hstw_inc;
258 }
260 /*
261 * At this point:
262 * act->pin updated to reference count mappings.
263 * sha->flags updated to indicate to granting domain mapping done.
264 * frame contains the mfn.
265 */
267 spin_unlock(&granting_d->grant_table->lock);
269 if ( (addr != 0) && (dev_hst_ro_flags & GNTMAP_host_map) )
270 {
271 /* Write update into the pagetable. */
272 l1_pgentry_t pte;
273 pte = l1e_from_pfn(frame, GRANT_PTE_FLAGS);
275 if ( (dev_hst_ro_flags & GNTMAP_application_map) )
276 l1e_add_flags(pte,_PAGE_USER);
277 if ( !(dev_hst_ro_flags & GNTMAP_readonly) )
278 l1e_add_flags(pte,_PAGE_RW);
280 if ( dev_hst_ro_flags & GNTMAP_contains_pte )
281 rc = update_grant_pte_mapping(addr, pte, mapping_d, mapping_ed);
282 else
283 rc = update_grant_va_mapping(addr, pte, mapping_d, mapping_ed);
285 /* IMPORTANT: rc indicates the degree of TLB flush that is required.
286 * GNTST_flush_one (1) or GNTST_flush_all (2). This is done in the
287 * outer gnttab_map_grant_ref. */
288 if ( rc < 0 )
289 {
290 /* Failure: undo and abort. */
292 spin_lock(&granting_d->grant_table->lock);
294 if ( dev_hst_ro_flags & GNTMAP_readonly )
295 {
296 act->pin -= GNTPIN_hstr_inc;
297 }
298 else
299 {
300 act->pin -= GNTPIN_hstw_inc;
301 if ( (act->pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) == 0 )
302 {
303 clear_bit(_GTF_writing, &sha->flags);
304 put_page_type(&frame_table[frame]);
305 }
306 }
308 if ( act->pin == 0 )
309 {
310 clear_bit(_GTF_reading, &sha->flags);
311 put_page(&frame_table[frame]);
312 }
314 spin_unlock(&granting_d->grant_table->lock);
315 }
317 }
319 *pframe = frame;
320 return rc;
322 unlock_out:
323 spin_unlock(&granting_d->grant_table->lock);
324 return rc;
325 }
327 /*
328 * Returns 0 if TLB flush / invalidate required by caller.
329 * va will indicate the address to be invalidated.
330 *
331 * addr is _either_ a host virtual address, or the address of the pte to
332 * update, as indicated by the GNTMAP_contains_pte flag.
333 */
334 static int
335 __gnttab_map_grant_ref(
336 gnttab_map_grant_ref_t *uop,
337 unsigned long *va)
338 {
339 domid_t dom;
340 grant_ref_t ref;
341 struct domain *ld, *rd;
342 struct vcpu *led;
343 u16 dev_hst_ro_flags;
344 int handle;
345 u64 addr;
346 unsigned long frame = 0;
347 int rc;
349 led = current;
350 ld = led->domain;
352 /* Bitwise-OR avoids short-circuiting which screws control flow. */
353 if ( unlikely(__get_user(dom, &uop->dom) |
354 __get_user(ref, &uop->ref) |
355 __get_user(addr, &uop->host_addr) |
356 __get_user(dev_hst_ro_flags, &uop->flags)) )
357 {
358 DPRINTK("Fault while reading gnttab_map_grant_ref_t.\n");
359 return -EFAULT; /* don't set status */
360 }
362 if ( (dev_hst_ro_flags & GNTMAP_host_map) &&
363 ( (addr == 0) ||
364 (!(dev_hst_ro_flags & GNTMAP_contains_pte) &&
365 unlikely(!__addr_ok(addr))) ) )
366 {
367 DPRINTK("Bad virtual address (%"PRIx64") or flags (%"PRIx16").\n",
368 addr, dev_hst_ro_flags);
369 (void)__put_user(GNTST_bad_virt_addr, &uop->handle);
370 return GNTST_bad_gntref;
371 }
373 if ( unlikely(ref >= NR_GRANT_ENTRIES) ||
374 unlikely((dev_hst_ro_flags &
375 (GNTMAP_device_map|GNTMAP_host_map)) == 0) )
376 {
377 DPRINTK("Bad ref (%d) or flags (%x).\n", ref, dev_hst_ro_flags);
378 (void)__put_user(GNTST_bad_gntref, &uop->handle);
379 return GNTST_bad_gntref;
380 }
382 if (acm_pre_grant_map_ref(dom)) {
383 (void)__put_user(GNTST_permission_denied, &uop->handle);
384 return GNTST_permission_denied;
385 }
387 if ( unlikely((rd = find_domain_by_id(dom)) == NULL) ||
388 unlikely(ld == rd) )
389 {
390 if ( rd != NULL )
391 put_domain(rd);
392 DPRINTK("Could not find domain %d\n", dom);
393 (void)__put_user(GNTST_bad_domain, &uop->handle);
394 return GNTST_bad_domain;
395 }
397 /* Get a maptrack handle. */
398 if ( unlikely((handle = get_maptrack_handle(ld->grant_table)) == -1) )
399 {
400 int i;
401 grant_mapping_t *new_mt;
402 grant_table_t *lgt = ld->grant_table;
404 if ( (lgt->maptrack_limit << 1) > MAPTRACK_MAX_ENTRIES )
405 {
406 put_domain(rd);
407 DPRINTK("Maptrack table is at maximum size.\n");
408 (void)__put_user(GNTST_no_device_space, &uop->handle);
409 return GNTST_no_device_space;
410 }
412 /* Grow the maptrack table. */
413 new_mt = alloc_xenheap_pages(lgt->maptrack_order + 1);
414 if ( new_mt == NULL )
415 {
416 put_domain(rd);
417 DPRINTK("No more map handles available.\n");
418 (void)__put_user(GNTST_no_device_space, &uop->handle);
419 return GNTST_no_device_space;
420 }
422 memcpy(new_mt, lgt->maptrack, PAGE_SIZE << lgt->maptrack_order);
423 for ( i = lgt->maptrack_limit; i < (lgt->maptrack_limit << 1); i++ )
424 new_mt[i].ref_and_flags = (i+1) << MAPTRACK_REF_SHIFT;
426 free_xenheap_pages(lgt->maptrack, lgt->maptrack_order);
427 lgt->maptrack = new_mt;
428 lgt->maptrack_order += 1;
429 lgt->maptrack_limit <<= 1;
431 DPRINTK("Doubled maptrack size\n");
432 handle = get_maptrack_handle(ld->grant_table);
433 }
435 #if GRANT_DEBUG_VERBOSE
436 DPRINTK("Mapping grant ref (%hu) for domain (%hu) with flags (%x)\n",
437 ref, dom, dev_hst_ro_flags);
438 #endif
440 if ( 0 <= ( rc = __gnttab_activate_grant_ref( ld, led, rd, ref,
441 dev_hst_ro_flags,
442 addr, &frame)))
443 {
444 /*
445 * Only make the maptrack live _after_ writing the pte, in case we
446 * overwrite the same frame number, causing a maptrack walk to find it
447 */
448 ld->grant_table->maptrack[handle].domid = dom;
450 ld->grant_table->maptrack[handle].ref_and_flags
451 = (ref << MAPTRACK_REF_SHIFT) |
452 (dev_hst_ro_flags & MAPTRACK_GNTMAP_MASK);
454 (void)__put_user((u64)frame << PAGE_SHIFT, &uop->dev_bus_addr);
456 if ( ( dev_hst_ro_flags & GNTMAP_host_map ) &&
457 !( dev_hst_ro_flags & GNTMAP_contains_pte) )
458 *va = addr;
460 (void)__put_user(handle, &uop->handle);
461 }
462 else
463 {
464 (void)__put_user(rc, &uop->handle);
465 put_maptrack_handle(ld->grant_table, handle);
466 }
468 put_domain(rd);
469 return rc;
470 }
472 static long
473 gnttab_map_grant_ref(
474 gnttab_map_grant_ref_t *uop, unsigned int count)
475 {
476 int i, rc, flush = 0;
477 unsigned long va = 0;
479 for ( i = 0; i < count; i++ )
480 if ( (rc =__gnttab_map_grant_ref(&uop[i], &va)) >= 0 )
481 flush += rc;
483 if ( flush == 1 )
484 flush_tlb_one_mask(current->domain->cpumask, va);
485 else if ( flush != 0 )
486 flush_tlb_mask(current->domain->cpumask);
488 return 0;
489 }
491 static int
492 __gnttab_unmap_grant_ref(
493 gnttab_unmap_grant_ref_t *uop,
494 unsigned long *va)
495 {
496 domid_t dom;
497 grant_ref_t ref;
498 u16 handle;
499 struct domain *ld, *rd;
500 active_grant_entry_t *act;
501 grant_entry_t *sha;
502 grant_mapping_t *map;
503 u16 flags;
504 s16 rc = 1;
505 u64 addr, dev_bus_addr;
506 unsigned long frame;
508 ld = current->domain;
510 /* Bitwise-OR avoids short-circuiting which screws control flow. */
511 if ( unlikely(__get_user(addr, &uop->host_addr) |
512 __get_user(dev_bus_addr, &uop->dev_bus_addr) |
513 __get_user(handle, &uop->handle)) )
514 {
515 DPRINTK("Fault while reading gnttab_unmap_grant_ref_t.\n");
516 return -EFAULT; /* don't set status */
517 }
519 frame = (unsigned long)(dev_bus_addr >> PAGE_SHIFT);
521 map = &ld->grant_table->maptrack[handle];
523 if ( unlikely(handle >= ld->grant_table->maptrack_limit) ||
524 unlikely(!(map->ref_and_flags & MAPTRACK_GNTMAP_MASK)) )
525 {
526 DPRINTK("Bad handle (%d).\n", handle);
527 (void)__put_user(GNTST_bad_handle, &uop->status);
528 return GNTST_bad_handle;
529 }
531 dom = map->domid;
532 ref = map->ref_and_flags >> MAPTRACK_REF_SHIFT;
533 flags = map->ref_and_flags & MAPTRACK_GNTMAP_MASK;
535 if ( unlikely((rd = find_domain_by_id(dom)) == NULL) ||
536 unlikely(ld == rd) )
537 {
538 if ( rd != NULL )
539 put_domain(rd);
540 DPRINTK("Could not find domain %d\n", dom);
541 (void)__put_user(GNTST_bad_domain, &uop->status);
542 return GNTST_bad_domain;
543 }
545 #if GRANT_DEBUG_VERBOSE
546 DPRINTK("Unmapping grant ref (%hu) for domain (%hu) with handle (%hu)\n",
547 ref, dom, handle);
548 #endif
550 act = &rd->grant_table->active[ref];
551 sha = &rd->grant_table->shared[ref];
553 spin_lock(&rd->grant_table->lock);
555 if ( frame == 0 )
556 {
557 frame = act->frame;
558 }
559 else
560 {
561 if ( unlikely(frame != act->frame) )
562 PIN_FAIL(unmap_out, GNTST_general_error,
563 "Bad frame number doesn't match gntref.\n");
564 if ( flags & GNTMAP_device_map )
565 act->pin -= (flags & GNTMAP_readonly) ? GNTPIN_devr_inc
566 : GNTPIN_devw_inc;
568 map->ref_and_flags &= ~GNTMAP_device_map;
569 (void)__put_user(0, &uop->dev_bus_addr);
571 /* Frame is now unmapped for device access. */
572 }
574 if ( (addr != 0) &&
575 (flags & GNTMAP_host_map) &&
576 ((act->pin & (GNTPIN_hstw_mask | GNTPIN_hstr_mask)) > 0))
577 {
578 if ( flags & GNTMAP_contains_pte )
579 {
580 if ( (rc = clear_grant_pte_mapping(addr, frame, ld)) < 0 )
581 goto unmap_out;
582 }
583 else
584 {
585 if ( (rc = clear_grant_va_mapping(addr, frame)) < 0 )
586 goto unmap_out;
587 }
589 map->ref_and_flags &= ~GNTMAP_host_map;
591 act->pin -= (flags & GNTMAP_readonly) ? GNTPIN_hstr_inc
592 : GNTPIN_hstw_inc;
594 rc = 0;
595 if ( !( flags & GNTMAP_contains_pte) )
596 *va = addr;
597 }
599 if ( (map->ref_and_flags & (GNTMAP_device_map|GNTMAP_host_map)) == 0)
600 {
601 map->ref_and_flags = 0;
602 put_maptrack_handle(ld->grant_table, handle);
603 }
605 /* If just unmapped a writable mapping, mark as dirtied */
606 if ( unlikely(shadow_mode_log_dirty(rd)) &&
607 !( flags & GNTMAP_readonly ) )
608 mark_dirty(rd, frame);
610 /* If the last writable mapping has been removed, put_page_type */
611 if ( ( (act->pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask) ) == 0) &&
612 ( !( flags & GNTMAP_readonly ) ) )
613 {
614 clear_bit(_GTF_writing, &sha->flags);
615 put_page_type(&frame_table[frame]);
616 }
618 if ( act->pin == 0 )
619 {
620 act->frame = 0xdeadbeef;
621 clear_bit(_GTF_reading, &sha->flags);
622 put_page(&frame_table[frame]);
623 }
625 unmap_out:
626 (void)__put_user(rc, &uop->status);
627 spin_unlock(&rd->grant_table->lock);
628 put_domain(rd);
629 return rc;
630 }
632 static long
633 gnttab_unmap_grant_ref(
634 gnttab_unmap_grant_ref_t *uop, unsigned int count)
635 {
636 int i, flush = 0;
637 unsigned long va = 0;
639 for ( i = 0; i < count; i++ )
640 if ( __gnttab_unmap_grant_ref(&uop[i], &va) == 0 )
641 flush++;
643 if ( flush == 1 )
644 flush_tlb_one_mask(current->domain->cpumask, va);
645 else if ( flush != 0 )
646 flush_tlb_mask(current->domain->cpumask);
648 return 0;
649 }
651 static long
652 gnttab_setup_table(
653 gnttab_setup_table_t *uop, unsigned int count)
654 {
655 gnttab_setup_table_t op;
656 struct domain *d;
657 int i;
659 if ( count != 1 )
660 return -EINVAL;
662 if ( unlikely(copy_from_user(&op, uop, sizeof(op)) != 0) )
663 {
664 DPRINTK("Fault while reading gnttab_setup_table_t.\n");
665 return -EFAULT;
666 }
668 if ( unlikely(op.nr_frames > NR_GRANT_FRAMES) )
669 {
670 DPRINTK("Xen only supports up to %d grant-table frames per domain.\n",
671 NR_GRANT_FRAMES);
672 (void)put_user(GNTST_general_error, &uop->status);
673 return 0;
674 }
676 if ( op.dom == DOMID_SELF )
677 {
678 op.dom = current->domain->domain_id;
679 }
680 else if ( unlikely(!IS_PRIV(current->domain)) )
681 {
682 (void)put_user(GNTST_permission_denied, &uop->status);
683 return 0;
684 }
686 if ( unlikely((d = find_domain_by_id(op.dom)) == NULL) )
687 {
688 DPRINTK("Bad domid %d.\n", op.dom);
689 (void)put_user(GNTST_bad_domain, &uop->status);
690 return 0;
691 }
693 if ( op.nr_frames <= NR_GRANT_FRAMES )
694 {
695 ASSERT(d->grant_table != NULL);
696 (void)put_user(GNTST_okay, &uop->status);
697 for ( i = 0; i < op.nr_frames; i++ )
698 (void)put_user(
699 (virt_to_phys(d->grant_table->shared) >> PAGE_SHIFT) + i,
700 &uop->frame_list[i]);
701 }
703 put_domain(d);
704 return 0;
705 }
707 #if GRANT_DEBUG
708 static int
709 gnttab_dump_table(gnttab_dump_table_t *uop)
710 {
711 grant_table_t *gt;
712 gnttab_dump_table_t op;
713 struct domain *d;
714 u32 shared_mfn;
715 active_grant_entry_t *act;
716 grant_entry_t sha_copy;
717 grant_mapping_t *maptrack;
718 int i;
721 if ( unlikely(copy_from_user(&op, uop, sizeof(op)) != 0) )
722 {
723 DPRINTK("Fault while reading gnttab_dump_table_t.\n");
724 return -EFAULT;
725 }
727 if ( op.dom == DOMID_SELF )
728 {
729 op.dom = current->domain->domain_id;
730 }
732 if ( unlikely((d = find_domain_by_id(op.dom)) == NULL) )
733 {
734 DPRINTK("Bad domid %d.\n", op.dom);
735 (void)put_user(GNTST_bad_domain, &uop->status);
736 return 0;
737 }
739 ASSERT(d->grant_table != NULL);
740 gt = d->grant_table;
741 (void)put_user(GNTST_okay, &uop->status);
743 shared_mfn = virt_to_phys(d->grant_table->shared);
745 DPRINTK("Grant table for dom (%hu) MFN (%x)\n",
746 op.dom, shared_mfn);
748 ASSERT(d->grant_table->active != NULL);
749 ASSERT(d->grant_table->shared != NULL);
750 ASSERT(d->grant_table->maptrack != NULL);
752 for ( i = 0; i < NR_GRANT_ENTRIES; i++ )
753 {
754 sha_copy = gt->shared[i];
756 if ( sha_copy.flags )
757 {
758 DPRINTK("Grant: dom (%hu) SHARED (%d) flags:(%hx) "
759 "dom:(%hu) frame:(%x)\n",
760 op.dom, i, sha_copy.flags, sha_copy.domid, sha_copy.frame);
761 }
762 }
764 spin_lock(&gt->lock);
766 for ( i = 0; i < NR_GRANT_ENTRIES; i++ )
767 {
768 act = &gt->active[i];
770 if ( act->pin )
771 {
772 DPRINTK("Grant: dom (%hu) ACTIVE (%d) pin:(%x) "
773 "dom:(%hu) frame:(%lx)\n",
774 op.dom, i, act->pin, act->domid, act->frame);
775 }
776 }
778 for ( i = 0; i < gt->maptrack_limit; i++ )
779 {
780 maptrack = &gt->maptrack[i];
782 if ( maptrack->ref_and_flags & MAPTRACK_GNTMAP_MASK )
783 {
784 DPRINTK("Grant: dom (%hu) MAP (%d) ref:(%hu) flags:(%x) "
785 "dom:(%hu)\n",
786 op.dom, i,
787 maptrack->ref_and_flags >> MAPTRACK_REF_SHIFT,
788 maptrack->ref_and_flags & MAPTRACK_GNTMAP_MASK,
789 maptrack->domid);
790 }
791 }
793 spin_unlock(&gt->lock);
795 put_domain(d);
796 return 0;
797 }
798 #endif
800 static long
801 gnttab_donate(gnttab_donate_t *uop, unsigned int count)
802 {
803 struct domain *d = current->domain;
804 struct domain *e;
805 struct pfn_info *page;
806 u32 _d, _nd, x, y;
807 int i;
808 int result = GNTST_okay;
810 for (i = 0; i < count; i++) {
811 gnttab_donate_t *gop = &uop[i];
812 #if GRANT_DEBUG
813 printk("gnttab_donate: i=%d mfn=%lx domid=%d gref=%08x\n",
814 i, gop->mfn, gop->domid, gop->handle);
815 #endif
816 page = &frame_table[gop->mfn];
818 if (unlikely(IS_XEN_HEAP_FRAME(page))) {
819 printk("gnttab_donate: xen heap frame mfn=%lx\n",
820 (unsigned long) gop->mfn);
821 gop->status = GNTST_bad_virt_addr;
822 continue;
823 }
824 if (unlikely(!pfn_valid(page_to_pfn(page)))) {
825 printk("gnttab_donate: invalid pfn for mfn=%lx\n",
826 (unsigned long) gop->mfn);
827 gop->status = GNTST_bad_virt_addr;
828 continue;
829 }
830 if (unlikely((e = find_domain_by_id(gop->domid)) == NULL)) {
831 printk("gnttab_donate: can't find domain %d\n", gop->domid);
832 gop->status = GNTST_bad_domain;
833 continue;
834 }
836 spin_lock(&d->page_alloc_lock);
838 /*
839 * The tricky bit: atomically release ownership while
840 * there is just one benign reference to the page
841 * (PGC_allocated). If that reference disappears then the
842 * deallocation routine will safely spin.
843 */
844 _d = pickle_domptr(d);
845 _nd = page->u.inuse._domain;
846 y = page->count_info;
847 do {
848 x = y;
849 if (unlikely((x & (PGC_count_mask|PGC_allocated)) !=
850 (1 | PGC_allocated)) || unlikely(_nd != _d)) {
851 printk("gnttab_donate: Bad page values %p: ed=%p(%u), sd=%p,"
852 " caf=%08x, taf=%" PRtype_info "\n",
853 (void *) page_to_pfn(page),
854 d, d->domain_id, unpickle_domptr(_nd), x,
855 page->u.inuse.type_info);
856 spin_unlock(&d->page_alloc_lock);
857 put_domain(e);
858 return 0;
859 }
860 __asm__ __volatile__(
861 LOCK_PREFIX "cmpxchg8b %2"
862 : "=d" (_nd), "=a" (y),
863 "=m" (*(volatile u64 *)(&page->count_info))
864 : "0" (_d), "1" (x), "c" (NULL), "b" (x) );
865 } while (unlikely(_nd != _d) || unlikely(y != x));
867 /*
868 * Unlink from 'd'. At least one reference remains (now
869 * anonymous), so noone else is spinning to try to delete
870 * this page from 'd'.
871 */
872 d->tot_pages--;
873 list_del(&page->list);
875 spin_unlock(&d->page_alloc_lock);
877 spin_lock(&e->page_alloc_lock);
879 /*
880 * Check that 'e' will accept the page and has reservation
881 * headroom. Also, a domain mustn't have PGC_allocated
882 * pages when it is dying.
883 */
884 #ifdef GRANT_DEBUG
885 if (unlikely(e->tot_pages >= e->max_pages)) {
886 printk("gnttab_dontate: no headroom tot_pages=%d max_pages=%d\n",
887 e->tot_pages, e->max_pages);
888 spin_unlock(&e->page_alloc_lock);
889 put_domain(e);
890 result = GNTST_general_error;
891 break;
892 }
893 if (unlikely(test_bit(DOMFLAGS_DYING, &e->domain_flags))) {
894 printk("gnttab_donate: target domain is dying\n");
895 spin_unlock(&e->page_alloc_lock);
896 put_domain(e);
897 result = GNTST_general_error;
898 break;
899 }
900 if (unlikely(!gnttab_prepare_for_transfer(e, d, gop->handle))) {
901 printk("gnttab_donate: gnttab_prepare_for_transfer fails\n");
902 spin_unlock(&e->page_alloc_lock);
903 put_domain(e);
904 result = GNTST_general_error;
905 break;
906 }
907 #else
908 ASSERT(e->tot_pages <= e->max_pages);
909 if (unlikely(test_bit(DOMFLAGS_DYING, &e->domain_flags)) ||
910 unlikely(e->tot_pages == e->max_pages) ||
911 unlikely(!gnttab_prepare_for_transfer(e, d, gop->handle))) {
912 printk("gnttab_donate: Transferee has no reservation headroom (%d,"
913 "%d) or provided a bad grant ref (%08x) or is dying (%p)\n",
914 e->tot_pages, e->max_pages, gop->handle, e->d_flags);
915 spin_unlock(&e->page_alloc_lock);
916 put_domain(e);
917 result = GNTST_general_error;
918 break;
919 }
920 #endif
921 /* Okay, add the page to 'e'. */
922 if (unlikely(e->tot_pages++ == 0)) {
923 get_knownalive_domain(e);
924 }
925 list_add_tail(&page->list, &e->page_list);
926 page_set_owner(page, e);
928 spin_unlock(&e->page_alloc_lock);
930 /*
931 * Transfer is all done: tell the guest about its new page
932 * frame.
933 */
934 gnttab_notify_transfer(e, d, gop->handle, gop->mfn);
936 put_domain(e);
938 gop->status = GNTST_okay;
939 }
940 return result;
941 }
943 long
944 do_grant_table_op(
945 unsigned int cmd, void *uop, unsigned int count)
946 {
947 long rc;
948 struct domain *d = current->domain;
950 if ( count > 512 )
951 return -EINVAL;
953 LOCK_BIGLOCK(d);
955 sync_pagetable_state(d);
957 rc = -EFAULT;
958 switch ( cmd )
959 {
960 case GNTTABOP_map_grant_ref:
961 if ( unlikely(!array_access_ok(
962 uop, count, sizeof(gnttab_map_grant_ref_t))) )
963 goto out;
964 rc = gnttab_map_grant_ref((gnttab_map_grant_ref_t *)uop, count);
965 break;
966 case GNTTABOP_unmap_grant_ref:
967 if ( unlikely(!array_access_ok(
968 uop, count, sizeof(gnttab_unmap_grant_ref_t))) )
969 goto out;
970 rc = gnttab_unmap_grant_ref((gnttab_unmap_grant_ref_t *)uop,
971 count);
972 break;
973 case GNTTABOP_setup_table:
974 rc = gnttab_setup_table((gnttab_setup_table_t *)uop, count);
975 break;
976 #if GRANT_DEBUG
977 case GNTTABOP_dump_table:
978 rc = gnttab_dump_table((gnttab_dump_table_t *)uop);
979 break;
980 #endif
981 case GNTTABOP_donate:
982 if (unlikely(!array_access_ok(uop, count,
983 sizeof(gnttab_donate_t))))
984 goto out;
985 rc = gnttab_donate(uop, count);
986 break;
987 default:
988 rc = -ENOSYS;
989 break;
990 }
992 out:
993 UNLOCK_BIGLOCK(d);
995 return rc;
996 }
998 int
999 gnttab_check_unmap(
1000 struct domain *rd, struct domain *ld, unsigned long frame, int readonly)
1002 /* Called when put_page is invoked on a page belonging to a foreign domain.
1003 * Instead of decrementing the frame table ref count, locate the grant
1004 * table entry, if any, and if found, decrement that count.
1005 * Called a _lot_ at domain creation because pages mapped by priv domains
1006 * also traverse this.
1007 */
1009 /* Note: If the same frame is mapped multiple times, and then one of
1010 * the ptes is overwritten, which maptrack handle gets invalidated?
1011 * Advice: Don't do it. Explicitly unmap.
1012 */
1014 unsigned int handle, ref, refcount;
1015 grant_table_t *lgt, *rgt;
1016 active_grant_entry_t *act;
1017 grant_mapping_t *map;
1018 int found = 0;
1020 lgt = ld->grant_table;
1022 #if GRANT_DEBUG_VERBOSE
1023 if ( ld->domain_ id != 0 ) {
1024 DPRINTK("Foreign unref rd(%d) ld(%d) frm(%lx) flgs(%x).\n",
1025 rd->domain_id, ld->domain_id, frame, readonly);
1027 #endif
1029 /* Fast exit if we're not mapping anything using grant tables */
1030 if ( lgt->map_count == 0 )
1031 return 0;
1033 if ( get_domain(rd) == 0 ) {
1034 DPRINTK("gnttab_check_unmap: couldn't get_domain rd(%d)\n",
1035 rd->domain_id);
1036 return 0;
1039 rgt = rd->grant_table;
1041 for ( handle = 0; handle < lgt->maptrack_limit; handle++ ) {
1043 map = &lgt->maptrack[handle];
1045 if ( map->domid != rd->domain_id )
1046 continue;
1048 if ( ( map->ref_and_flags & MAPTRACK_GNTMAP_MASK ) &&
1049 ( readonly ? 1 : (!(map->ref_and_flags & GNTMAP_readonly)))) {
1051 ref = (map->ref_and_flags >> MAPTRACK_REF_SHIFT);
1052 act = &rgt->active[ref];
1054 spin_lock(&rgt->lock);
1056 if ( act->frame != frame ) {
1057 spin_unlock(&rgt->lock);
1058 continue;
1061 refcount = act->pin & ( readonly ? GNTPIN_hstr_mask
1062 : GNTPIN_hstw_mask );
1064 if ( refcount == 0 ) {
1065 spin_unlock(&rgt->lock);
1066 continue;
1069 /* gotcha */
1070 DPRINTK("Grant unref rd(%d) ld(%d) frm(%lx) flgs(%x).\n",
1071 rd->domain_id, ld->domain_id, frame, readonly);
1073 if ( readonly )
1074 act->pin -= GNTPIN_hstr_inc;
1075 else {
1076 act->pin -= GNTPIN_hstw_inc;
1078 /* any more granted writable mappings? */
1079 if ( (act->pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) == 0 ) {
1080 clear_bit(_GTF_writing, &rgt->shared[ref].flags);
1081 put_page_type(&frame_table[frame]);
1085 if ( act->pin == 0 ) {
1086 clear_bit(_GTF_reading, &rgt->shared[ref].flags);
1087 put_page(&frame_table[frame]);
1090 spin_unlock(&rgt->lock);
1092 clear_bit(GNTMAP_host_map, &map->ref_and_flags);
1094 if ( !(map->ref_and_flags & GNTMAP_device_map) )
1095 put_maptrack_handle(lgt, handle);
1097 found = 1;
1098 break;
1101 put_domain(rd);
1103 return found;
1106 int
1107 gnttab_prepare_for_transfer(
1108 struct domain *rd, struct domain *ld, grant_ref_t ref)
1110 grant_table_t *rgt;
1111 grant_entry_t *sha;
1112 domid_t sdom;
1113 u16 sflags;
1114 u32 scombo, prev_scombo;
1115 int retries = 0;
1116 unsigned long target_pfn;
1118 #if GRANT_DEBUG_VERBOSE
1119 DPRINTK("gnttab_prepare_for_transfer rd(%hu) ld(%hu) ref(%hu).\n",
1120 rd->domain_id, ld->domain_id, ref);
1121 #endif
1123 if ( unlikely((rgt = rd->grant_table) == NULL) ||
1124 unlikely(ref >= NR_GRANT_ENTRIES) )
1126 DPRINTK("Dom %d has no g.t., or ref is bad (%d).\n",
1127 rd->domain_id, ref);
1128 return 0;
1131 spin_lock(&rgt->lock);
1133 sha = &rgt->shared[ref];
1135 sflags = sha->flags;
1136 sdom = sha->domid;
1138 for ( ; ; )
1140 target_pfn = sha->frame;
1142 if ( unlikely(target_pfn >= max_page ) )
1144 DPRINTK("Bad pfn (%lx)\n", target_pfn);
1145 goto fail;
1148 if ( unlikely(sflags != GTF_accept_transfer) ||
1149 unlikely(sdom != ld->domain_id) )
1151 DPRINTK("Bad flags (%x) or dom (%d). (NB. expected dom %d)\n",
1152 sflags, sdom, ld->domain_id);
1153 goto fail;
1156 /* Merge two 16-bit values into a 32-bit combined update. */
1157 /* NB. Endianness! */
1158 prev_scombo = scombo = ((u32)sdom << 16) | (u32)sflags;
1160 /* NB. prev_scombo is updated in place to seen value. */
1161 if ( unlikely(cmpxchg_user((u32 *)&sha->flags, prev_scombo,
1162 prev_scombo | GTF_transfer_committed)) )
1164 DPRINTK("Fault while modifying shared flags and domid.\n");
1165 goto fail;
1168 /* Did the combined update work (did we see what we expected?). */
1169 if ( likely(prev_scombo == scombo) )
1170 break;
1172 if ( retries++ == 4 )
1174 DPRINTK("Shared grant entry is unstable.\n");
1175 goto fail;
1178 /* Didn't see what we expected. Split out the seen flags & dom. */
1179 /* NB. Endianness! */
1180 sflags = (u16)prev_scombo;
1181 sdom = (u16)(prev_scombo >> 16);
1184 spin_unlock(&rgt->lock);
1185 return 1;
1187 fail:
1188 spin_unlock(&rgt->lock);
1189 return 0;
1192 void
1193 gnttab_notify_transfer(
1194 struct domain *rd, struct domain *ld, grant_ref_t ref, unsigned long frame)
1196 grant_entry_t *sha;
1197 unsigned long pfn;
1199 #if GRANT_DEBUG_VERBOSE
1200 DPRINTK("gnttab_notify_transfer rd(%hu) ld(%hu) ref(%hu).\n",
1201 rd->domain_id, ld->domain_id, ref);
1202 #endif
1204 sha = &rd->grant_table->shared[ref];
1206 spin_lock(&rd->grant_table->lock);
1208 pfn = sha->frame;
1210 if ( unlikely(pfn >= max_page ) )
1211 DPRINTK("Bad pfn (%lx)\n", pfn);
1212 else
1214 machine_to_phys_mapping[frame] = pfn;
1216 if ( unlikely(shadow_mode_log_dirty(ld)))
1217 mark_dirty(ld, frame);
1219 if (shadow_mode_translate(ld))
1220 __phys_to_machine_mapping[pfn] = frame;
1222 sha->frame = __mfn_to_gpfn(rd, frame);
1223 sha->domid = rd->domain_id;
1224 wmb();
1225 sha->flags = ( GTF_accept_transfer | GTF_transfer_completed );
1227 spin_unlock(&rd->grant_table->lock);
1229 return;
1232 int
1233 grant_table_create(
1234 struct domain *d)
1236 grant_table_t *t;
1237 int i;
1239 if ( (t = xmalloc(grant_table_t)) == NULL )
1240 goto no_mem;
1242 /* Simple stuff. */
1243 memset(t, 0, sizeof(*t));
1244 spin_lock_init(&t->lock);
1246 /* Active grant table. */
1247 if ( (t->active = xmalloc_array(active_grant_entry_t, NR_GRANT_ENTRIES))
1248 == NULL )
1249 goto no_mem;
1250 memset(t->active, 0, sizeof(active_grant_entry_t) * NR_GRANT_ENTRIES);
1252 /* Tracking of mapped foreign frames table */
1253 if ( (t->maptrack = alloc_xenheap_page()) == NULL )
1254 goto no_mem;
1255 t->maptrack_order = 0;
1256 t->maptrack_limit = PAGE_SIZE / sizeof(grant_mapping_t);
1257 memset(t->maptrack, 0, PAGE_SIZE);
1258 for ( i = 0; i < t->maptrack_limit; i++ )
1259 t->maptrack[i].ref_and_flags = (i+1) << MAPTRACK_REF_SHIFT;
1261 /* Shared grant table. */
1262 t->shared = alloc_xenheap_pages(ORDER_GRANT_FRAMES);
1263 if ( t->shared == NULL )
1264 goto no_mem;
1265 memset(t->shared, 0, NR_GRANT_FRAMES * PAGE_SIZE);
1267 for ( i = 0; i < NR_GRANT_FRAMES; i++ )
1269 SHARE_PFN_WITH_DOMAIN(
1270 virt_to_page((char *)(t->shared)+(i*PAGE_SIZE)), d);
1271 machine_to_phys_mapping[(virt_to_phys(t->shared) >> PAGE_SHIFT) + i] =
1272 INVALID_M2P_ENTRY;
1275 /* Okay, install the structure. */
1276 wmb(); /* avoid races with lock-free access to d->grant_table */
1277 d->grant_table = t;
1278 return 0;
1280 no_mem:
1281 if ( t != NULL )
1283 xfree(t->active);
1284 if ( t->maptrack != NULL )
1285 free_xenheap_page(t->maptrack);
1286 xfree(t);
1288 return -ENOMEM;
1291 void
1292 gnttab_release_dev_mappings(grant_table_t *gt)
1294 grant_mapping_t *map;
1295 domid_t dom;
1296 grant_ref_t ref;
1297 u16 handle;
1298 struct domain *ld, *rd;
1299 unsigned long frame;
1300 active_grant_entry_t *act;
1301 grant_entry_t *sha;
1303 ld = current->domain;
1305 for ( handle = 0; handle < gt->maptrack_limit; handle++ )
1307 map = &gt->maptrack[handle];
1309 if ( map->ref_and_flags & GNTMAP_device_map )
1311 dom = map->domid;
1312 ref = map->ref_and_flags >> MAPTRACK_REF_SHIFT;
1314 DPRINTK("Grant release (%hu) ref:(%hu) flags:(%x) dom:(%hu)\n",
1315 handle, ref,
1316 map->ref_and_flags & MAPTRACK_GNTMAP_MASK, dom);
1318 if ( unlikely((rd = find_domain_by_id(dom)) == NULL) ||
1319 unlikely(ld == rd) )
1321 if ( rd != NULL )
1322 put_domain(rd);
1324 printk(KERN_WARNING "Grant release: No dom%d\n", dom);
1325 continue;
1328 act = &rd->grant_table->active[ref];
1329 sha = &rd->grant_table->shared[ref];
1331 spin_lock(&rd->grant_table->lock);
1333 if ( act->pin & (GNTPIN_devw_mask | GNTPIN_devr_mask) )
1335 frame = act->frame;
1337 if ( ( (act->pin & GNTPIN_hstw_mask) == 0 ) &&
1338 ( (act->pin & GNTPIN_devw_mask) > 0 ) )
1340 clear_bit(_GTF_writing, &sha->flags);
1341 put_page_type(&frame_table[frame]);
1344 act->pin &= ~(GNTPIN_devw_mask | GNTPIN_devr_mask);
1346 if ( act->pin == 0 )
1348 clear_bit(_GTF_reading, &sha->flags);
1349 map->ref_and_flags = 0;
1350 put_page(&frame_table[frame]);
1352 else
1353 map->ref_and_flags &= ~GNTMAP_device_map;
1356 spin_unlock(&rd->grant_table->lock);
1358 put_domain(rd);
1364 void
1365 grant_table_destroy(
1366 struct domain *d)
1368 grant_table_t *t;
1370 if ( (t = d->grant_table) != NULL )
1372 /* Free memory relating to this grant table. */
1373 d->grant_table = NULL;
1374 free_xenheap_pages(t->shared, ORDER_GRANT_FRAMES);
1375 free_xenheap_page(t->maptrack);
1376 xfree(t->active);
1377 xfree(t);
1381 void
1382 grant_table_init(
1383 void)
1385 /* Nothing. */
1388 /*
1389 * Local variables:
1390 * mode: C
1391 * c-set-style: "BSD"
1392 * c-basic-offset: 4
1393 * tab-width: 4
1394 * indent-tabs-mode: nil
1395 * End:
1396 */