ia64/xen-unstable

view xen/arch/ia64/xen/vhpt.c @ 15892:b2a02f7ed849

[IA64] Make use of PAGE_MASK and PAGE_ALIGN()

As suggested by Isaku Yamahata

Signed-off-by: Alex Williamson <alex.williamson@hp.com>
author Alex Williamson <alex.williamson@hp.com>
date Mon Sep 17 13:38:25 2007 -0600 (2007-09-17)
parents fdd298b75fb5
children 3ad0080ad9b5
line source
1 /*
2 * Initialize VHPT support.
3 *
4 * Copyright (C) 2004 Hewlett-Packard Co
5 * Dan Magenheimer <dan.magenheimer@hp.com>
6 *
7 * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
8 * VA Linux Systems Japan K.K.
9 * per vcpu vhpt support
10 */
11 #include <linux/config.h>
12 #include <linux/kernel.h>
13 #include <linux/init.h>
15 #include <asm/processor.h>
16 #include <asm/system.h>
17 #include <asm/pgalloc.h>
18 #include <asm/page.h>
19 #include <asm/vhpt.h>
20 #include <asm/vcpu.h>
21 #include <asm/vcpumask.h>
22 #include <asm/vmmu.h>
24 DEFINE_PER_CPU (unsigned long, vhpt_paddr);
25 DEFINE_PER_CPU (unsigned long, vhpt_pend);
26 #ifdef CONFIG_XEN_IA64_TLBFLUSH_CLOCK
27 DEFINE_PER_CPU(volatile u32, vhpt_tlbflush_timestamp);
28 #endif
30 static void
31 __vhpt_flush(unsigned long vhpt_maddr)
32 {
33 struct vhpt_lf_entry *v = (struct vhpt_lf_entry*)__va(vhpt_maddr);
34 int i;
36 for (i = 0; i < VHPT_NUM_ENTRIES; i++, v++)
37 v->ti_tag = INVALID_TI_TAG;
38 }
40 void
41 local_vhpt_flush(void)
42 {
43 /* increment flush clock before flush */
44 u32 flush_time = tlbflush_clock_inc_and_return();
45 __vhpt_flush(__ia64_per_cpu_var(vhpt_paddr));
46 /* this must be after flush */
47 tlbflush_update_time(&__get_cpu_var(vhpt_tlbflush_timestamp),
48 flush_time);
49 perfc_incr(local_vhpt_flush);
50 }
52 void
53 vcpu_vhpt_flush(struct vcpu* v)
54 {
55 __vhpt_flush(vcpu_vhpt_maddr(v));
56 perfc_incr(vcpu_vhpt_flush);
57 }
59 static void
60 vhpt_erase(unsigned long vhpt_maddr)
61 {
62 struct vhpt_lf_entry *v = (struct vhpt_lf_entry*)__va(vhpt_maddr);
63 int i;
65 for (i = 0; i < VHPT_NUM_ENTRIES; i++, v++) {
66 v->itir = 0;
67 v->CChain = 0;
68 v->page_flags = 0;
69 v->ti_tag = INVALID_TI_TAG;
70 }
71 // initialize cache too???
72 }
74 void vhpt_insert (unsigned long vadr, unsigned long pte, unsigned long itir)
75 {
76 struct vhpt_lf_entry *vlfe = (struct vhpt_lf_entry *)ia64_thash(vadr);
77 unsigned long tag = ia64_ttag (vadr);
79 /* Even though VHPT is per VCPU, still need to first disable the entry,
80 * because the processor may support speculative VHPT walk. */
81 vlfe->ti_tag = INVALID_TI_TAG;
82 wmb();
83 vlfe->itir = itir;
84 vlfe->page_flags = pte | _PAGE_P;
85 *(volatile unsigned long*)&vlfe->ti_tag = tag;
86 }
88 void vhpt_multiple_insert(unsigned long vaddr, unsigned long pte,
89 unsigned long itir)
90 {
91 unsigned char ps = current->arch.vhpt_pg_shift;
92 ia64_itir_t _itir = {.itir = itir};
93 unsigned long mask = (1L << _itir.ps) - 1;
94 int i;
96 if (_itir.ps - ps > 10 && !running_on_sim) {
97 // if this happens, we may want to revisit this algorithm
98 panic("vhpt_multiple_insert:logps-PAGE_SHIFT>10,spinning..\n");
99 }
100 if (_itir.ps - ps > 2) {
101 // FIXME: Should add counter here to see how often this
102 // happens (e.g. for 16MB pages!) and determine if it
103 // is a performance problem. On a quick look, it takes
104 // about 39000 instrs for a 16MB page and it seems to occur
105 // only a few times/second, so OK for now.
106 // An alternate solution would be to just insert the one
107 // 16KB in the vhpt (but with the full mapping)?
108 //printk("vhpt_multiple_insert: logps-PAGE_SHIFT==%d,"
109 //"va=%p, pa=%p, pa-masked=%p\n",
110 //logps-PAGE_SHIFT,vaddr,pte&_PFN_MASK,
111 //(pte&_PFN_MASK)&~mask);
112 }
113 vaddr &= ~mask;
114 pte = ((pte & _PFN_MASK) & ~mask) | (pte & ~_PFN_MASK);
115 for (i = 1L << (_itir.ps - ps); i > 0; i--) {
116 vhpt_insert(vaddr, pte, _itir.itir);
117 vaddr += (1L << ps);
118 }
119 }
121 void __init vhpt_init(void)
122 {
123 unsigned long paddr;
124 struct page_info *page;
125 #if !VHPT_ENABLED
126 return;
127 #endif
128 /* This allocation only holds true if vhpt table is unique for
129 * all domains. Or else later new vhpt table should be allocated
130 * from domain heap when each domain is created. Assume xen buddy
131 * allocator can provide natural aligned page by order?
132 */
133 page = alloc_domheap_pages(NULL, VHPT_SIZE_LOG2 - PAGE_SHIFT, 0);
134 if (!page)
135 panic("vhpt_init: can't allocate VHPT!\n");
136 paddr = page_to_maddr(page);
137 if (paddr & ((1 << VHPT_SIZE_LOG2) - 1))
138 panic("vhpt_init: bad VHPT alignment!\n");
139 __get_cpu_var(vhpt_paddr) = paddr;
140 __get_cpu_var(vhpt_pend) = paddr + (1 << VHPT_SIZE_LOG2) - 1;
141 printk(XENLOG_DEBUG "vhpt_init: vhpt paddr=0x%lx, end=0x%lx\n",
142 paddr, __get_cpu_var(vhpt_pend));
143 vhpt_erase(paddr);
144 // we don't enable VHPT here.
145 // context_switch() or schedule_tail() does it.
146 }
148 #ifdef CONFIG_XEN_IA64_PERVCPU_VHPT
149 int
150 pervcpu_vhpt_alloc(struct vcpu *v)
151 {
152 unsigned long vhpt_size_log2 = VHPT_SIZE_LOG2;
154 v->arch.vhpt_entries =
155 (1UL << vhpt_size_log2) / sizeof(struct vhpt_lf_entry);
156 v->arch.vhpt_page =
157 alloc_domheap_pages(NULL, vhpt_size_log2 - PAGE_SHIFT, 0);
158 if (!v->arch.vhpt_page)
159 return -ENOMEM;
161 v->arch.vhpt_maddr = page_to_maddr(v->arch.vhpt_page);
162 if (v->arch.vhpt_maddr & ((1 << VHPT_SIZE_LOG2) - 1))
163 panic("pervcpu_vhpt_init: bad VHPT alignment!\n");
165 v->arch.pta.val = 0; // to zero reserved bits
166 v->arch.pta.ve = 1; // enable vhpt
167 v->arch.pta.size = VHPT_SIZE_LOG2;
168 v->arch.pta.vf = 1; // long format
169 v->arch.pta.base = __va_ul(v->arch.vhpt_maddr) >> 15;
171 vhpt_erase(v->arch.vhpt_maddr);
172 smp_mb(); // per vcpu vhpt may be used by another physical cpu.
173 return 0;
174 }
176 void
177 pervcpu_vhpt_free(struct vcpu *v)
178 {
179 if (likely(v->arch.vhpt_page != NULL))
180 free_domheap_pages(v->arch.vhpt_page,
181 VHPT_SIZE_LOG2 - PAGE_SHIFT);
182 }
183 #endif
185 void
186 domain_purge_swtc_entries(struct domain *d)
187 {
188 struct vcpu* v;
189 for_each_vcpu(d, v) {
190 if (!v->is_initialised)
191 continue;
193 /* Purge TC entries.
194 FIXME: clear only if match. */
195 vcpu_purge_tr_entry(&PSCBX(v,dtlb));
196 vcpu_purge_tr_entry(&PSCBX(v,itlb));
197 }
198 }
200 void
201 domain_purge_swtc_entries_vcpu_dirty_mask(struct domain* d,
202 vcpumask_t vcpu_dirty_mask)
203 {
204 int vcpu;
206 for_each_vcpu_mask(vcpu, vcpu_dirty_mask) {
207 struct vcpu* v = d->vcpu[vcpu];
208 if (!v->is_initialised)
209 continue;
211 /* Purge TC entries.
212 FIXME: clear only if match. */
213 vcpu_purge_tr_entry(&PSCBX(v, dtlb));
214 vcpu_purge_tr_entry(&PSCBX(v, itlb));
215 }
216 }
218 // SMP: we can't assume v == current, vcpu might move to another physical cpu.
219 // So memory barrier is necessary.
220 // if we can guranttee that vcpu can run on only this physical cpu
221 // (e.g. vcpu == current), smp_mb() is unnecessary.
222 void vcpu_flush_vtlb_all(struct vcpu *v)
223 {
224 if (VMX_DOMAIN(v)) {
225 /* This code may be call for remapping shared_info and
226 grant_table share page from guest_physmap_remove_page()
227 in arch_memory_op() XENMEM_add_to_physmap to realize
228 PV-on-HVM feature. */
229 /* FIXME: This is not SMP-safe yet about p2m table */
230 /* Purge vTLB for VT-i domain */
231 thash_purge_all(v);
232 }
233 else {
234 /* First VCPU tlb. */
235 vcpu_purge_tr_entry(&PSCBX(v,dtlb));
236 vcpu_purge_tr_entry(&PSCBX(v,itlb));
237 smp_mb();
239 /* Then VHPT. */
240 if (HAS_PERVCPU_VHPT(v->domain))
241 vcpu_vhpt_flush(v);
242 else
243 local_vhpt_flush();
244 smp_mb();
246 /* Then mTLB. */
247 local_flush_tlb_all();
248 }
250 /* We could clear bit in d->domain_dirty_cpumask only if domain d in
251 not running on this processor. There is currently no easy way to
252 check this. */
254 perfc_incr(vcpu_flush_vtlb_all);
255 }
257 static void __vcpu_flush_vtlb_all(void *vcpu)
258 {
259 vcpu_flush_vtlb_all((struct vcpu*)vcpu);
260 }
262 // caller must incremented reference count to d somehow.
263 void domain_flush_vtlb_all(struct domain* d)
264 {
265 int cpu = smp_processor_id ();
266 struct vcpu *v;
268 for_each_vcpu(d, v) {
269 if (!v->is_initialised)
270 continue;
272 if (v->processor == cpu)
273 vcpu_flush_vtlb_all(v);
274 else
275 // SMP: it is racy to reference v->processor.
276 // vcpu scheduler may move this vcpu to another
277 // physicall processor, and change the value
278 // using plain store.
279 // We may be seeing the old value of it.
280 // In such case, flush_vtlb_for_context_switch()
281 // takes care of mTLB flush.
282 smp_call_function_single(v->processor,
283 __vcpu_flush_vtlb_all,
284 v, 1, 1);
285 }
286 perfc_incr(domain_flush_vtlb_all);
287 }
289 // Callers may need to call smp_mb() before/after calling this.
290 // Be carefull.
291 static void
292 __flush_vhpt_range(unsigned long vhpt_maddr, u64 vadr, u64 addr_range)
293 {
294 void *vhpt_base = __va(vhpt_maddr);
295 u64 pgsz = 1L << current->arch.vhpt_pg_shift;
296 u64 purge_addr = vadr & PAGE_MASK;
298 addr_range += vadr - purge_addr;
299 addr_range = PAGE_ALIGN(addr_range);
300 while ((long)addr_range > 0) {
301 /* Get the VHPT entry. */
302 unsigned int off = ia64_thash(purge_addr) -
303 __va_ul(vcpu_vhpt_maddr(current));
304 struct vhpt_lf_entry *v = vhpt_base + off;
305 v->ti_tag = INVALID_TI_TAG;
306 addr_range -= pgsz;
307 purge_addr += pgsz;
308 }
309 }
311 static void
312 cpu_flush_vhpt_range(int cpu, u64 vadr, u64 addr_range)
313 {
314 __flush_vhpt_range(per_cpu(vhpt_paddr, cpu), vadr, addr_range);
315 }
317 static void
318 vcpu_flush_vhpt_range(struct vcpu* v, u64 vadr, u64 addr_range)
319 {
320 __flush_vhpt_range(vcpu_vhpt_maddr(v), vadr, addr_range);
321 }
323 void vcpu_flush_tlb_vhpt_range (u64 vadr, u64 log_range)
324 {
325 if (HAS_PERVCPU_VHPT(current->domain))
326 vcpu_flush_vhpt_range(current, vadr, 1UL << log_range);
327 else
328 cpu_flush_vhpt_range(current->processor,
329 vadr, 1UL << log_range);
330 ia64_ptcl(vadr, log_range << 2);
331 ia64_srlz_i();
332 perfc_incr(vcpu_flush_tlb_vhpt_range);
333 }
335 void domain_flush_vtlb_range (struct domain *d, u64 vadr, u64 addr_range)
336 {
337 struct vcpu *v;
339 #if 0
340 // this only seems to occur at shutdown, but it does occur
341 if ((!addr_range) || addr_range & (addr_range - 1)) {
342 printk("vhpt_flush_address: weird range, spinning...\n");
343 while(1);
344 }
345 #endif
347 domain_purge_swtc_entries(d);
348 smp_mb();
350 for_each_vcpu (d, v) {
351 if (!v->is_initialised)
352 continue;
354 if (HAS_PERVCPU_VHPT(d)) {
355 vcpu_flush_vhpt_range(v, vadr, addr_range);
356 } else {
357 // SMP: it is racy to reference v->processor.
358 // vcpu scheduler may move this vcpu to another
359 // physicall processor, and change the value
360 // using plain store.
361 // We may be seeing the old value of it.
362 // In such case, flush_vtlb_for_context_switch()
363 /* Invalidate VHPT entries. */
364 cpu_flush_vhpt_range(v->processor, vadr, addr_range);
365 }
366 }
367 // ptc.ga has release semantics.
369 /* ptc.ga */
370 platform_global_tlb_purge(vadr, vadr + addr_range,
371 current->arch.vhpt_pg_shift);
372 perfc_incr(domain_flush_vtlb_range);
373 }
375 #ifdef CONFIG_XEN_IA64_TLB_TRACK
376 #include <asm/tlb_track.h>
377 #include <asm/vmx_vcpu.h>
378 void
379 __domain_flush_vtlb_track_entry(struct domain* d,
380 const struct tlb_track_entry* entry)
381 {
382 unsigned long rr7_rid;
383 int swap_rr0 = 0;
384 unsigned long old_rid;
385 unsigned long vaddr = entry->vaddr;
386 struct vcpu* v;
387 int cpu;
388 int vcpu;
389 int local_purge = 1;
391 /* tlb inert tracking is done in PAGE_SIZE uint. */
392 unsigned char ps = max_t(unsigned char,
393 current->arch.vhpt_pg_shift, PAGE_SHIFT);
394 /* This case isn't supported (yet). */
395 BUG_ON(current->arch.vhpt_pg_shift > PAGE_SHIFT);
397 BUG_ON((vaddr >> VRN_SHIFT) != VRN7);
398 /*
399 * heuristic:
400 * dom0linux accesses grant mapped pages via the kernel
401 * straight mapped area and it doesn't change rr7 rid.
402 * So it is likey that rr7 == entry->rid so that
403 * we can avoid rid change.
404 * When blktap is supported, this heuristic should be revised.
405 */
406 vcpu_get_rr(current, VRN7 << VRN_SHIFT, &rr7_rid);
407 if (likely(rr7_rid == entry->rid)) {
408 perfc_incr(tlb_track_use_rr7);
409 } else {
410 swap_rr0 = 1;
411 vaddr = (vaddr << 3) >> 3;// force vrn0
412 perfc_incr(tlb_track_swap_rr0);
413 }
415 // tlb_track_entry_printf(entry);
416 if (swap_rr0) {
417 vcpu_get_rr(current, 0, &old_rid);
418 vcpu_set_rr(current, 0, entry->rid);
419 }
421 if (HAS_PERVCPU_VHPT(d)) {
422 for_each_vcpu_mask(vcpu, entry->vcpu_dirty_mask) {
423 v = d->vcpu[vcpu];
424 if (!v->is_initialised)
425 continue;
427 /* Invalidate VHPT entries. */
428 vcpu_flush_vhpt_range(v, vaddr, 1L << ps);
430 /*
431 * current->processor == v->processor
432 * is racy. we may see old v->processor and
433 * a new physical processor of v might see old
434 * vhpt entry and insert tlb.
435 */
436 if (v != current)
437 local_purge = 0;
438 }
439 } else {
440 for_each_cpu_mask(cpu, entry->pcpu_dirty_mask) {
441 /* Invalidate VHPT entries. */
442 cpu_flush_vhpt_range(cpu, vaddr, 1L << ps);
444 if (d->vcpu[cpu] != current)
445 local_purge = 0;
446 }
447 }
449 /* ptc.ga */
450 if (local_purge) {
451 ia64_ptcl(vaddr, ps << 2);
452 perfc_incr(domain_flush_vtlb_local);
453 } else {
454 /* ptc.ga has release semantics. */
455 platform_global_tlb_purge(vaddr, vaddr + (1L << ps), ps);
456 perfc_incr(domain_flush_vtlb_global);
457 }
459 if (swap_rr0) {
460 vcpu_set_rr(current, 0, old_rid);
461 }
462 perfc_incr(domain_flush_vtlb_track_entry);
463 }
465 void
466 domain_flush_vtlb_track_entry(struct domain* d,
467 const struct tlb_track_entry* entry)
468 {
469 domain_purge_swtc_entries_vcpu_dirty_mask(d, entry->vcpu_dirty_mask);
470 smp_mb();
472 __domain_flush_vtlb_track_entry(d, entry);
473 }
475 #endif
477 static void flush_tlb_vhpt_all (struct domain *d)
478 {
479 /* First VHPT. */
480 local_vhpt_flush ();
482 /* Then mTLB. */
483 local_flush_tlb_all ();
484 }
486 void domain_flush_tlb_vhpt(struct domain *d)
487 {
488 /* Very heavy... */
489 if (HAS_PERVCPU_VHPT(d) || d->arch.is_vti)
490 on_each_cpu((void (*)(void *))local_flush_tlb_all, NULL, 1, 1);
491 else
492 on_each_cpu((void (*)(void *))flush_tlb_vhpt_all, d, 1, 1);
493 cpus_clear (d->domain_dirty_cpumask);
494 }
496 void flush_tlb_mask(cpumask_t mask)
497 {
498 int cpu;
500 cpu = smp_processor_id();
501 if (cpu_isset (cpu, mask)) {
502 cpu_clear(cpu, mask);
503 flush_tlb_vhpt_all (NULL);
504 }
506 if (cpus_empty(mask))
507 return;
509 for_each_cpu_mask (cpu, mask)
510 smp_call_function_single
511 (cpu, (void (*)(void *))flush_tlb_vhpt_all, NULL, 1, 1);
512 }
514 #ifdef PERF_COUNTERS
515 void gather_vhpt_stats(void)
516 {
517 int i, cpu;
519 perfc_set(vhpt_nbr_entries, VHPT_NUM_ENTRIES);
521 for_each_present_cpu (cpu) {
522 struct vhpt_lf_entry *v = __va(per_cpu(vhpt_paddr, cpu));
523 unsigned long vhpt_valid = 0;
525 for (i = 0; i < VHPT_NUM_ENTRIES; i++, v++)
526 if (!(v->ti_tag & INVALID_TI_TAG))
527 vhpt_valid++;
528 per_cpu(perfcounters, cpu)[PERFC_vhpt_valid_entries] = vhpt_valid;
529 }
530 }
531 #endif