ia64/xen-unstable

view xen/arch/ia64/xen/vhpt.c @ 16785:af3550f53874

[IA64] domheap: Don't pin xenheap down. Now it's unnecessary.

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
author Alex Williamson <alex.williamson@hp.com>
date Thu Jan 17 12:05:43 2008 -0700 (2008-01-17)
parents 09cd682ac68e
children 564fa97594a6
line source
1 /*
2 * Initialize VHPT support.
3 *
4 * Copyright (C) 2004 Hewlett-Packard Co
5 * Dan Magenheimer <dan.magenheimer@hp.com>
6 *
7 * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
8 * VA Linux Systems Japan K.K.
9 * per vcpu vhpt support
10 */
11 #include <linux/config.h>
12 #include <linux/kernel.h>
13 #include <linux/init.h>
15 #include <asm/processor.h>
16 #include <asm/system.h>
17 #include <asm/pgalloc.h>
18 #include <asm/page.h>
19 #include <asm/vhpt.h>
20 #include <asm/vcpu.h>
21 #include <asm/vcpumask.h>
22 #include <asm/vmmu.h>
24 DEFINE_PER_CPU (unsigned long, vhpt_paddr);
25 DEFINE_PER_CPU (unsigned long, vhpt_pend);
26 #ifdef CONFIG_XEN_IA64_TLBFLUSH_CLOCK
27 DEFINE_PER_CPU(volatile u32, vhpt_tlbflush_timestamp);
28 #endif
30 static void
31 __vhpt_flush(unsigned long vhpt_maddr, unsigned long vhpt_size_log2)
32 {
33 struct vhpt_lf_entry *v = (struct vhpt_lf_entry*)__va(vhpt_maddr);
34 unsigned long num_entries = 1 << (vhpt_size_log2 - 5);
35 int i;
37 for (i = 0; i < num_entries; i++, v++)
38 v->ti_tag = INVALID_TI_TAG;
39 }
41 void
42 local_vhpt_flush(void)
43 {
44 /* increment flush clock before flush */
45 u32 flush_time = tlbflush_clock_inc_and_return();
46 __vhpt_flush(__ia64_per_cpu_var(vhpt_paddr), VHPT_SIZE_LOG2);
47 /* this must be after flush */
48 tlbflush_update_time(&__get_cpu_var(vhpt_tlbflush_timestamp),
49 flush_time);
50 perfc_incr(local_vhpt_flush);
51 }
53 void
54 vcpu_vhpt_flush(struct vcpu* v)
55 {
56 unsigned long vhpt_size_log2 = VHPT_SIZE_LOG2;
57 #ifdef CONFIG_XEN_IA64_PERVCPU_VHPT
58 if (HAS_PERVCPU_VHPT(v->domain))
59 vhpt_size_log2 = v->arch.pta.size;
60 #endif
61 __vhpt_flush(vcpu_vhpt_maddr(v), vhpt_size_log2);
62 perfc_incr(vcpu_vhpt_flush);
63 }
65 static void
66 vhpt_erase(unsigned long vhpt_maddr, unsigned long vhpt_size_log2)
67 {
68 struct vhpt_lf_entry *v = (struct vhpt_lf_entry*)__va(vhpt_maddr);
69 unsigned long num_entries = 1 << (vhpt_size_log2 - 5);
70 int i;
72 for (i = 0; i < num_entries; i++, v++) {
73 v->itir = 0;
74 v->CChain = 0;
75 v->page_flags = 0;
76 v->ti_tag = INVALID_TI_TAG;
77 }
78 // initialize cache too???
79 }
81 void vhpt_insert (unsigned long vadr, unsigned long pte, unsigned long itir)
82 {
83 struct vhpt_lf_entry *vlfe = (struct vhpt_lf_entry *)ia64_thash(vadr);
84 unsigned long tag = ia64_ttag (vadr);
86 /* Even though VHPT is per VCPU, still need to first disable the entry,
87 * because the processor may support speculative VHPT walk. */
88 vlfe->ti_tag = INVALID_TI_TAG;
89 wmb();
90 vlfe->itir = itir;
91 vlfe->page_flags = pte | _PAGE_P;
92 *(volatile unsigned long*)&vlfe->ti_tag = tag;
93 }
95 void vhpt_multiple_insert(unsigned long vaddr, unsigned long pte,
96 unsigned long itir)
97 {
98 unsigned char ps = current->arch.vhpt_pg_shift;
99 ia64_itir_t _itir = {.itir = itir};
100 unsigned long mask = (1L << _itir.ps) - 1;
101 int i;
103 if (_itir.ps - ps > 10 && !running_on_sim) {
104 // if this happens, we may want to revisit this algorithm
105 panic("vhpt_multiple_insert:logps-PAGE_SHIFT>10,spinning..\n");
106 }
107 if (_itir.ps - ps > 2) {
108 // FIXME: Should add counter here to see how often this
109 // happens (e.g. for 16MB pages!) and determine if it
110 // is a performance problem. On a quick look, it takes
111 // about 39000 instrs for a 16MB page and it seems to occur
112 // only a few times/second, so OK for now.
113 // An alternate solution would be to just insert the one
114 // 16KB in the vhpt (but with the full mapping)?
115 //printk("vhpt_multiple_insert: logps-PAGE_SHIFT==%d,"
116 //"va=%p, pa=%p, pa-masked=%p\n",
117 //logps-PAGE_SHIFT,vaddr,pte&_PFN_MASK,
118 //(pte&_PFN_MASK)&~mask);
119 }
120 vaddr &= ~mask;
121 pte = ((pte & _PFN_MASK) & ~mask) | (pte & ~_PFN_MASK);
122 for (i = 1L << (_itir.ps - ps); i > 0; i--) {
123 vhpt_insert(vaddr, pte, _itir.itir);
124 vaddr += (1L << ps);
125 }
126 }
128 void __init vhpt_init(void)
129 {
130 unsigned long paddr;
131 struct page_info *page;
132 #if !VHPT_ENABLED
133 return;
134 #endif
135 /* This allocation only holds true if vhpt table is unique for
136 * all domains. Or else later new vhpt table should be allocated
137 * from domain heap when each domain is created. Assume xen buddy
138 * allocator can provide natural aligned page by order?
139 */
140 page = alloc_domheap_pages(NULL, VHPT_SIZE_LOG2 - PAGE_SHIFT, 0);
141 if (!page)
142 panic("vhpt_init: can't allocate VHPT!\n");
143 paddr = page_to_maddr(page);
144 if (paddr & ((1 << VHPT_SIZE_LOG2) - 1))
145 panic("vhpt_init: bad VHPT alignment!\n");
146 __get_cpu_var(vhpt_paddr) = paddr;
147 __get_cpu_var(vhpt_pend) = paddr + (1 << VHPT_SIZE_LOG2) - 1;
148 printk(XENLOG_DEBUG "vhpt_init: vhpt paddr=0x%lx, end=0x%lx\n",
149 paddr, __get_cpu_var(vhpt_pend));
150 vhpt_erase(paddr, VHPT_SIZE_LOG2);
151 // we don't enable VHPT here.
152 // context_switch() or schedule_tail() does it.
153 }
155 #ifdef CONFIG_XEN_IA64_PERVCPU_VHPT
156 int
157 pervcpu_vhpt_alloc(struct vcpu *v)
158 {
159 unsigned long vhpt_size_log2 = VHPT_SIZE_LOG2;
161 if (v->domain->arch.vhpt_size_log2 > 0)
162 vhpt_size_log2 =
163 canonicalize_vhpt_size(v->domain->arch.vhpt_size_log2);
164 printk(XENLOG_DEBUG "%s vhpt_size_log2=%ld\n",
165 __func__, vhpt_size_log2);
166 v->arch.vhpt_entries =
167 (1UL << vhpt_size_log2) / sizeof(struct vhpt_lf_entry);
168 v->arch.vhpt_page =
169 alloc_domheap_pages(NULL, vhpt_size_log2 - PAGE_SHIFT, 0);
170 if (!v->arch.vhpt_page)
171 return -ENOMEM;
173 v->arch.vhpt_maddr = page_to_maddr(v->arch.vhpt_page);
174 if (v->arch.vhpt_maddr & ((1 << VHPT_SIZE_LOG2) - 1))
175 panic("pervcpu_vhpt_init: bad VHPT alignment!\n");
177 v->arch.pta.val = 0; // to zero reserved bits
178 v->arch.pta.ve = 1; // enable vhpt
179 v->arch.pta.size = vhpt_size_log2;
180 v->arch.pta.vf = 1; // long format
181 v->arch.pta.base = __va_ul(v->arch.vhpt_maddr) >> 15;
183 vhpt_erase(v->arch.vhpt_maddr, vhpt_size_log2);
184 smp_mb(); // per vcpu vhpt may be used by another physical cpu.
185 return 0;
186 }
188 void
189 pervcpu_vhpt_free(struct vcpu *v)
190 {
191 if (likely(v->arch.vhpt_page != NULL))
192 free_domheap_pages(v->arch.vhpt_page,
193 v->arch.pta.size - PAGE_SHIFT);
194 }
195 #endif
197 void
198 domain_purge_swtc_entries(struct domain *d)
199 {
200 struct vcpu* v;
201 for_each_vcpu(d, v) {
202 if (!v->is_initialised)
203 continue;
205 /* Purge TC entries.
206 FIXME: clear only if match. */
207 vcpu_purge_tr_entry(&PSCBX(v,dtlb));
208 vcpu_purge_tr_entry(&PSCBX(v,itlb));
209 }
210 }
212 void
213 domain_purge_swtc_entries_vcpu_dirty_mask(struct domain* d,
214 vcpumask_t vcpu_dirty_mask)
215 {
216 int vcpu;
218 for_each_vcpu_mask(vcpu, vcpu_dirty_mask) {
219 struct vcpu* v = d->vcpu[vcpu];
220 if (!v->is_initialised)
221 continue;
223 /* Purge TC entries.
224 FIXME: clear only if match. */
225 vcpu_purge_tr_entry(&PSCBX(v, dtlb));
226 vcpu_purge_tr_entry(&PSCBX(v, itlb));
227 }
228 }
230 // SMP: we can't assume v == current, vcpu might move to another physical cpu.
231 // So memory barrier is necessary.
232 // if we can guranttee that vcpu can run on only this physical cpu
233 // (e.g. vcpu == current), smp_mb() is unnecessary.
234 void vcpu_flush_vtlb_all(struct vcpu *v)
235 {
236 if (VMX_DOMAIN(v)) {
237 /* This code may be call for remapping shared_info and
238 grant_table share page from guest_physmap_remove_page()
239 in arch_memory_op() XENMEM_add_to_physmap to realize
240 PV-on-HVM feature. */
241 /* FIXME: This is not SMP-safe yet about p2m table */
242 /* Purge vTLB for VT-i domain */
243 thash_purge_all(v);
244 }
245 else {
246 /* First VCPU tlb. */
247 vcpu_purge_tr_entry(&PSCBX(v,dtlb));
248 vcpu_purge_tr_entry(&PSCBX(v,itlb));
249 smp_mb();
251 /* Then VHPT. */
252 if (HAS_PERVCPU_VHPT(v->domain))
253 vcpu_vhpt_flush(v);
254 else
255 local_vhpt_flush();
256 smp_mb();
258 /* Then mTLB. */
259 local_flush_tlb_all();
260 }
262 /* We could clear bit in d->domain_dirty_cpumask only if domain d in
263 not running on this processor. There is currently no easy way to
264 check this. */
266 perfc_incr(vcpu_flush_vtlb_all);
267 }
269 static void __vcpu_flush_vtlb_all(void *vcpu)
270 {
271 vcpu_flush_vtlb_all((struct vcpu*)vcpu);
272 }
274 // caller must incremented reference count to d somehow.
275 void domain_flush_vtlb_all(struct domain* d)
276 {
277 int cpu = smp_processor_id ();
278 struct vcpu *v;
280 for_each_vcpu(d, v) {
281 if (!v->is_initialised)
282 continue;
284 if (v->processor == cpu)
285 vcpu_flush_vtlb_all(v);
286 else
287 // SMP: it is racy to reference v->processor.
288 // vcpu scheduler may move this vcpu to another
289 // physicall processor, and change the value
290 // using plain store.
291 // We may be seeing the old value of it.
292 // In such case, flush_vtlb_for_context_switch()
293 // takes care of mTLB flush.
294 smp_call_function_single(v->processor,
295 __vcpu_flush_vtlb_all,
296 v, 1, 1);
297 }
298 perfc_incr(domain_flush_vtlb_all);
299 }
301 // Callers may need to call smp_mb() before/after calling this.
302 // Be carefull.
303 static void
304 __flush_vhpt_range(unsigned long vhpt_maddr, u64 vadr, u64 addr_range)
305 {
306 void *vhpt_base = __va(vhpt_maddr);
307 u64 pgsz = 1L << current->arch.vhpt_pg_shift;
308 u64 purge_addr = vadr & PAGE_MASK;
310 addr_range += vadr - purge_addr;
311 addr_range = PAGE_ALIGN(addr_range);
312 while ((long)addr_range > 0) {
313 /* Get the VHPT entry. */
314 unsigned int off = ia64_thash(purge_addr) -
315 __va_ul(vcpu_vhpt_maddr(current));
316 struct vhpt_lf_entry *v = vhpt_base + off;
317 v->ti_tag = INVALID_TI_TAG;
318 addr_range -= pgsz;
319 purge_addr += pgsz;
320 }
321 }
323 static void
324 cpu_flush_vhpt_range(int cpu, u64 vadr, u64 addr_range)
325 {
326 __flush_vhpt_range(per_cpu(vhpt_paddr, cpu), vadr, addr_range);
327 }
329 static void
330 vcpu_flush_vhpt_range(struct vcpu* v, u64 vadr, u64 addr_range)
331 {
332 __flush_vhpt_range(vcpu_vhpt_maddr(v), vadr, addr_range);
333 }
335 void vcpu_flush_tlb_vhpt_range (u64 vadr, u64 log_range)
336 {
337 if (HAS_PERVCPU_VHPT(current->domain))
338 vcpu_flush_vhpt_range(current, vadr, 1UL << log_range);
339 else
340 cpu_flush_vhpt_range(current->processor,
341 vadr, 1UL << log_range);
342 ia64_ptcl(vadr, log_range << 2);
343 ia64_srlz_i();
344 perfc_incr(vcpu_flush_tlb_vhpt_range);
345 }
347 void domain_flush_vtlb_range (struct domain *d, u64 vadr, u64 addr_range)
348 {
349 struct vcpu *v;
351 #if 0
352 // this only seems to occur at shutdown, but it does occur
353 if ((!addr_range) || addr_range & (addr_range - 1)) {
354 printk("vhpt_flush_address: weird range, spinning...\n");
355 while(1);
356 }
357 #endif
359 domain_purge_swtc_entries(d);
360 smp_mb();
362 for_each_vcpu (d, v) {
363 if (!v->is_initialised)
364 continue;
366 if (HAS_PERVCPU_VHPT(d)) {
367 vcpu_flush_vhpt_range(v, vadr, addr_range);
368 } else {
369 // SMP: it is racy to reference v->processor.
370 // vcpu scheduler may move this vcpu to another
371 // physicall processor, and change the value
372 // using plain store.
373 // We may be seeing the old value of it.
374 // In such case, flush_vtlb_for_context_switch()
375 /* Invalidate VHPT entries. */
376 cpu_flush_vhpt_range(v->processor, vadr, addr_range);
377 }
378 }
379 // ptc.ga has release semantics.
381 /* ptc.ga */
382 platform_global_tlb_purge(vadr, vadr + addr_range,
383 current->arch.vhpt_pg_shift);
384 perfc_incr(domain_flush_vtlb_range);
385 }
387 #ifdef CONFIG_XEN_IA64_TLB_TRACK
388 #include <asm/tlb_track.h>
389 #include <asm/vmx_vcpu.h>
390 void
391 __domain_flush_vtlb_track_entry(struct domain* d,
392 const struct tlb_track_entry* entry)
393 {
394 unsigned long rr7_rid;
395 int swap_rr0 = 0;
396 unsigned long old_rid;
397 unsigned long vaddr = entry->vaddr;
398 struct vcpu* v;
399 int cpu;
400 int vcpu;
401 int local_purge = 1;
403 /* tlb inert tracking is done in PAGE_SIZE uint. */
404 unsigned char ps = max_t(unsigned char,
405 current->arch.vhpt_pg_shift, PAGE_SHIFT);
406 /* This case isn't supported (yet). */
407 BUG_ON(current->arch.vhpt_pg_shift > PAGE_SHIFT);
409 BUG_ON((vaddr >> VRN_SHIFT) != VRN7);
410 /*
411 * heuristic:
412 * dom0linux accesses grant mapped pages via the kernel
413 * straight mapped area and it doesn't change rr7 rid.
414 * So it is likey that rr7 == entry->rid so that
415 * we can avoid rid change.
416 * When blktap is supported, this heuristic should be revised.
417 */
418 vcpu_get_rr(current, VRN7 << VRN_SHIFT, &rr7_rid);
419 if (likely(rr7_rid == entry->rid)) {
420 perfc_incr(tlb_track_use_rr7);
421 } else {
422 swap_rr0 = 1;
423 vaddr = (vaddr << 3) >> 3;// force vrn0
424 perfc_incr(tlb_track_swap_rr0);
425 }
427 // tlb_track_entry_printf(entry);
428 if (swap_rr0) {
429 vcpu_get_rr(current, 0, &old_rid);
430 vcpu_set_rr(current, 0, entry->rid);
431 }
433 if (HAS_PERVCPU_VHPT(d)) {
434 for_each_vcpu_mask(vcpu, entry->vcpu_dirty_mask) {
435 v = d->vcpu[vcpu];
436 if (!v->is_initialised)
437 continue;
439 /* Invalidate VHPT entries. */
440 vcpu_flush_vhpt_range(v, vaddr, 1L << ps);
442 /*
443 * current->processor == v->processor
444 * is racy. we may see old v->processor and
445 * a new physical processor of v might see old
446 * vhpt entry and insert tlb.
447 */
448 if (v != current)
449 local_purge = 0;
450 }
451 } else {
452 for_each_cpu_mask(cpu, entry->pcpu_dirty_mask) {
453 /* Invalidate VHPT entries. */
454 cpu_flush_vhpt_range(cpu, vaddr, 1L << ps);
456 if (d->vcpu[cpu] != current)
457 local_purge = 0;
458 }
459 }
461 /* ptc.ga */
462 if (local_purge) {
463 ia64_ptcl(vaddr, ps << 2);
464 perfc_incr(domain_flush_vtlb_local);
465 } else {
466 /* ptc.ga has release semantics. */
467 platform_global_tlb_purge(vaddr, vaddr + (1L << ps), ps);
468 perfc_incr(domain_flush_vtlb_global);
469 }
471 if (swap_rr0) {
472 vcpu_set_rr(current, 0, old_rid);
473 }
474 perfc_incr(domain_flush_vtlb_track_entry);
475 }
477 void
478 domain_flush_vtlb_track_entry(struct domain* d,
479 const struct tlb_track_entry* entry)
480 {
481 domain_purge_swtc_entries_vcpu_dirty_mask(d, entry->vcpu_dirty_mask);
482 smp_mb();
484 __domain_flush_vtlb_track_entry(d, entry);
485 }
487 #endif
489 static void flush_tlb_vhpt_all (struct domain *d)
490 {
491 /* First VHPT. */
492 local_vhpt_flush ();
494 /* Then mTLB. */
495 local_flush_tlb_all ();
496 }
498 void domain_flush_tlb_vhpt(struct domain *d)
499 {
500 /* Very heavy... */
501 if (HAS_PERVCPU_VHPT(d) || is_hvm_domain(d))
502 on_each_cpu((void (*)(void *))local_flush_tlb_all, NULL, 1, 1);
503 else
504 on_each_cpu((void (*)(void *))flush_tlb_vhpt_all, d, 1, 1);
505 cpus_clear (d->domain_dirty_cpumask);
506 }
508 void flush_tlb_mask(cpumask_t mask)
509 {
510 int cpu;
512 cpu = smp_processor_id();
513 if (cpu_isset (cpu, mask)) {
514 cpu_clear(cpu, mask);
515 flush_tlb_vhpt_all (NULL);
516 }
518 if (cpus_empty(mask))
519 return;
521 for_each_cpu_mask (cpu, mask)
522 smp_call_function_single
523 (cpu, (void (*)(void *))flush_tlb_vhpt_all, NULL, 1, 1);
524 }
526 #ifdef PERF_COUNTERS
527 void gather_vhpt_stats(void)
528 {
529 int i, cpu;
531 perfc_set(vhpt_nbr_entries, VHPT_NUM_ENTRIES);
533 for_each_present_cpu (cpu) {
534 struct vhpt_lf_entry *v = __va(per_cpu(vhpt_paddr, cpu));
535 unsigned long vhpt_valid = 0;
537 for (i = 0; i < VHPT_NUM_ENTRIES; i++, v++)
538 if (!(v->ti_tag & INVALID_TI_TAG))
539 vhpt_valid++;
540 per_cpu(perfcounters, cpu)[PERFC_vhpt_valid_entries] = vhpt_valid;
541 }
542 }
543 #endif