direct-io.hg

view xen/arch/ia64/xen/vhpt.c @ 11985:c5ddcf89f050

[IA64] Add not-SMP-safe comment about PV-on-HVM

Signed-off-by: Tsunehisa Doi <Doi.Tsunehisa@jp.fujitsu.com>
author awilliam@xenbuild.aw
date Tue Oct 17 14:30:36 2006 -0600 (2006-10-17)
parents de50245ad4e3
children 78c494a16b95
line source
1 /*
2 * Initialize VHPT support.
3 *
4 * Copyright (C) 2004 Hewlett-Packard Co
5 * Dan Magenheimer <dan.magenheimer@hp.com>
6 *
7 * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
8 * VA Linux Systems Japan K.K.
9 * per vcpu vhpt support
10 */
11 #include <linux/config.h>
12 #include <linux/kernel.h>
13 #include <linux/init.h>
15 #include <asm/processor.h>
16 #include <asm/system.h>
17 #include <asm/pgalloc.h>
18 #include <asm/page.h>
19 #include <asm/vhpt.h>
20 #include <asm/vcpu.h>
21 #include <asm/vcpumask.h>
22 #include <asm/vmmu.h>
24 /* Defined in tlb.c */
25 extern void ia64_global_tlb_purge(UINT64 start, UINT64 end, UINT64 nbits);
27 extern long running_on_sim;
29 DEFINE_PER_CPU (unsigned long, vhpt_paddr);
30 DEFINE_PER_CPU (unsigned long, vhpt_pend);
31 #ifdef CONFIG_XEN_IA64_TLBFLUSH_CLOCK
32 DEFINE_PER_CPU(volatile u32, vhpt_tlbflush_timestamp);
33 #endif
35 static void
36 __vhpt_flush(unsigned long vhpt_maddr)
37 {
38 struct vhpt_lf_entry *v = (struct vhpt_lf_entry*)__va(vhpt_maddr);
39 int i;
41 for (i = 0; i < VHPT_NUM_ENTRIES; i++, v++)
42 v->ti_tag = INVALID_TI_TAG;
43 }
45 void
46 local_vhpt_flush(void)
47 {
48 /* increment flush clock before flush */
49 u32 flush_time = tlbflush_clock_inc_and_return();
50 __vhpt_flush(__ia64_per_cpu_var(vhpt_paddr));
51 /* this must be after flush */
52 tlbflush_update_time(&__get_cpu_var(vhpt_tlbflush_timestamp),
53 flush_time);
54 perfc_incrc(local_vhpt_flush);
55 }
57 void
58 vcpu_vhpt_flush(struct vcpu* v)
59 {
60 /* increment flush clock before flush */
61 u32 flush_time = tlbflush_clock_inc_and_return();
62 __vhpt_flush(vcpu_vhpt_maddr(v));
63 /* this must be after flush */
64 tlbflush_update_time(&v->arch.tlbflush_timestamp, flush_time);
65 perfc_incrc(vcpu_vhpt_flush);
66 }
68 static void
69 vhpt_erase(unsigned long vhpt_maddr)
70 {
71 struct vhpt_lf_entry *v = (struct vhpt_lf_entry*)__va(vhpt_maddr);
72 int i;
74 for (i = 0; i < VHPT_NUM_ENTRIES; i++, v++) {
75 v->itir = 0;
76 v->CChain = 0;
77 v->page_flags = 0;
78 v->ti_tag = INVALID_TI_TAG;
79 }
80 // initialize cache too???
81 }
83 void vhpt_insert (unsigned long vadr, unsigned long pte, unsigned long logps)
84 {
85 struct vhpt_lf_entry *vlfe = (struct vhpt_lf_entry *)ia64_thash(vadr);
86 unsigned long tag = ia64_ttag (vadr);
88 /* No need to first disable the entry, since VHPT is per LP
89 and VHPT is TR mapped. */
90 vlfe->itir = logps;
91 vlfe->page_flags = pte | _PAGE_P;
92 vlfe->ti_tag = tag;
93 }
95 void vhpt_multiple_insert(unsigned long vaddr, unsigned long pte, unsigned long logps)
96 {
97 unsigned long mask = (1L << logps) - 1;
98 int i;
100 if (logps-PAGE_SHIFT > 10 && !running_on_sim) {
101 // if this happens, we may want to revisit this algorithm
102 panic("vhpt_multiple_insert:logps-PAGE_SHIFT>10,spinning..\n");
103 }
104 if (logps-PAGE_SHIFT > 2) {
105 // FIXME: Should add counter here to see how often this
106 // happens (e.g. for 16MB pages!) and determine if it
107 // is a performance problem. On a quick look, it takes
108 // about 39000 instrs for a 16MB page and it seems to occur
109 // only a few times/second, so OK for now.
110 // An alternate solution would be to just insert the one
111 // 16KB in the vhpt (but with the full mapping)?
112 //printf("vhpt_multiple_insert: logps-PAGE_SHIFT==%d,"
113 //"va=%p, pa=%p, pa-masked=%p\n",
114 //logps-PAGE_SHIFT,vaddr,pte&_PFN_MASK,
115 //(pte&_PFN_MASK)&~mask);
116 }
117 vaddr &= ~mask;
118 pte = ((pte & _PFN_MASK) & ~mask) | (pte & ~_PFN_MASK);
119 for (i = 1L << (logps-PAGE_SHIFT); i > 0; i--) {
120 vhpt_insert(vaddr,pte,logps<<2);
121 vaddr += PAGE_SIZE;
122 }
123 }
125 void vhpt_init(void)
126 {
127 unsigned long paddr;
128 struct page_info *page;
129 #if !VHPT_ENABLED
130 return;
131 #endif
132 /* This allocation only holds true if vhpt table is unique for
133 * all domains. Or else later new vhpt table should be allocated
134 * from domain heap when each domain is created. Assume xen buddy
135 * allocator can provide natural aligned page by order?
136 */
137 page = alloc_domheap_pages(NULL, VHPT_SIZE_LOG2 - PAGE_SHIFT, 0);
138 if (!page)
139 panic("vhpt_init: can't allocate VHPT!\n");
140 paddr = page_to_maddr(page);
141 if (paddr & ((1 << VHPT_SIZE_LOG2) - 1))
142 panic("vhpt_init: bad VHPT alignment!\n");
143 __get_cpu_var(vhpt_paddr) = paddr;
144 __get_cpu_var(vhpt_pend) = paddr + (1 << VHPT_SIZE_LOG2) - 1;
145 printf("vhpt_init: vhpt paddr=0x%lx, end=0x%lx\n",
146 paddr, __get_cpu_var(vhpt_pend));
147 vhpt_erase(paddr);
148 // we don't enable VHPT here.
149 // context_switch() or schedule_tail() does it.
150 }
152 #ifdef CONFIG_XEN_IA64_PERVCPU_VHPT
153 int
154 pervcpu_vhpt_alloc(struct vcpu *v)
155 {
156 unsigned long vhpt_size_log2 = VHPT_SIZE_LOG2;
158 v->arch.vhpt_entries =
159 (1UL << vhpt_size_log2) / sizeof(struct vhpt_lf_entry);
160 v->arch.vhpt_page =
161 alloc_domheap_pages(NULL, vhpt_size_log2 - PAGE_SHIFT, 0);
162 if (!v->arch.vhpt_page)
163 return -ENOMEM;
165 v->arch.vhpt_maddr = page_to_maddr(v->arch.vhpt_page);
166 if (v->arch.vhpt_maddr & ((1 << VHPT_SIZE_LOG2) - 1))
167 panic("pervcpu_vhpt_init: bad VHPT alignment!\n");
169 v->arch.pta.val = 0; // to zero reserved bits
170 v->arch.pta.ve = 1; // enable vhpt
171 v->arch.pta.size = VHPT_SIZE_LOG2;
172 v->arch.pta.vf = 1; // long format
173 v->arch.pta.base = __va_ul(v->arch.vhpt_maddr) >> 15;
175 vhpt_erase(v->arch.vhpt_maddr);
176 smp_mb(); // per vcpu vhpt may be used by another physical cpu.
177 return 0;
178 }
180 void
181 pervcpu_vhpt_free(struct vcpu *v)
182 {
183 free_domheap_pages(v->arch.vhpt_page, VHPT_SIZE_LOG2 - PAGE_SHIFT);
184 }
185 #endif
187 void
188 domain_purge_swtc_entries(struct domain *d)
189 {
190 struct vcpu* v;
191 for_each_vcpu(d, v) {
192 if (!test_bit(_VCPUF_initialised, &v->vcpu_flags))
193 continue;
195 /* Purge TC entries.
196 FIXME: clear only if match. */
197 vcpu_purge_tr_entry(&PSCBX(v,dtlb));
198 vcpu_purge_tr_entry(&PSCBX(v,itlb));
199 }
200 }
202 void
203 domain_purge_swtc_entries_vcpu_dirty_mask(struct domain* d,
204 vcpumask_t vcpu_dirty_mask)
205 {
206 int vcpu;
208 for_each_vcpu_mask(vcpu, vcpu_dirty_mask) {
209 struct vcpu* v = d->vcpu[vcpu];
210 if (!test_bit(_VCPUF_initialised, &v->vcpu_flags))
211 continue;
213 /* Purge TC entries.
214 FIXME: clear only if match. */
215 vcpu_purge_tr_entry(&PSCBX(v, dtlb));
216 vcpu_purge_tr_entry(&PSCBX(v, itlb));
217 }
218 }
220 // SMP: we can't assume v == current, vcpu might move to another physical cpu.
221 // So memory barrier is necessary.
222 // if we can guranttee that vcpu can run on only this physical cpu
223 // (e.g. vcpu == current), smp_mb() is unnecessary.
224 void vcpu_flush_vtlb_all(struct vcpu *v)
225 {
226 if (VMX_DOMAIN(v)) {
227 /* This code may be call for remapping shared_info and
228 grant_table share page from guest_physmap_remove_page()
229 in arch_memory_op() XENMEM_add_to_physmap to realize
230 PV-on-HVM feature. */
231 /* FIXME: This is not SMP-safe yet about p2m table */
232 /* Purge vTLB for VT-i domain */
233 thash_purge_all(v);
234 }
235 else {
236 /* First VCPU tlb. */
237 vcpu_purge_tr_entry(&PSCBX(v,dtlb));
238 vcpu_purge_tr_entry(&PSCBX(v,itlb));
239 smp_mb();
241 /* Then VHPT. */
242 if (HAS_PERVCPU_VHPT(v->domain))
243 vcpu_vhpt_flush(v);
244 else
245 local_vhpt_flush();
246 smp_mb();
248 /* Then mTLB. */
249 local_flush_tlb_all();
250 }
252 /* We could clear bit in d->domain_dirty_cpumask only if domain d in
253 not running on this processor. There is currently no easy way to
254 check this. */
256 perfc_incrc(vcpu_flush_vtlb_all);
257 }
259 static void __vcpu_flush_vtlb_all(void *vcpu)
260 {
261 vcpu_flush_vtlb_all((struct vcpu*)vcpu);
262 }
264 void domain_flush_vtlb_all (void)
265 {
266 int cpu = smp_processor_id ();
267 struct vcpu *v;
269 for_each_vcpu (current->domain, v) {
270 if (!test_bit(_VCPUF_initialised, &v->vcpu_flags))
271 continue;
273 if (v->processor == cpu)
274 vcpu_flush_vtlb_all(v);
275 else
276 // SMP: it is racy to reference v->processor.
277 // vcpu scheduler may move this vcpu to another
278 // physicall processor, and change the value
279 // using plain store.
280 // We may be seeing the old value of it.
281 // In such case, flush_vtlb_for_context_switch()
282 // takes care of mTLB flush.
283 smp_call_function_single(v->processor,
284 __vcpu_flush_vtlb_all,
285 v, 1, 1);
286 }
287 perfc_incrc(domain_flush_vtlb_all);
288 }
290 // Callers may need to call smp_mb() before/after calling this.
291 // Be carefull.
292 static void
293 __flush_vhpt_range(unsigned long vhpt_maddr, u64 vadr, u64 addr_range)
294 {
295 void *vhpt_base = __va(vhpt_maddr);
297 while ((long)addr_range > 0) {
298 /* Get the VHPT entry. */
299 unsigned int off = ia64_thash(vadr) -
300 __va_ul(vcpu_vhpt_maddr(current));
301 struct vhpt_lf_entry *v = vhpt_base + off;
302 v->ti_tag = INVALID_TI_TAG;
303 addr_range -= PAGE_SIZE;
304 vadr += PAGE_SIZE;
305 }
306 }
308 static void
309 cpu_flush_vhpt_range(int cpu, u64 vadr, u64 addr_range)
310 {
311 __flush_vhpt_range(per_cpu(vhpt_paddr, cpu), vadr, addr_range);
312 }
314 static void
315 vcpu_flush_vhpt_range(struct vcpu* v, u64 vadr, u64 addr_range)
316 {
317 __flush_vhpt_range(vcpu_vhpt_maddr(v), vadr, addr_range);
318 }
320 void vcpu_flush_tlb_vhpt_range (u64 vadr, u64 log_range)
321 {
322 if (HAS_PERVCPU_VHPT(current->domain))
323 vcpu_flush_vhpt_range(current, vadr, 1UL << log_range);
324 else
325 cpu_flush_vhpt_range(current->processor,
326 vadr, 1UL << log_range);
327 ia64_ptcl(vadr, log_range << 2);
328 ia64_srlz_i();
329 perfc_incrc(vcpu_flush_tlb_vhpt_range);
330 }
332 void domain_flush_vtlb_range (struct domain *d, u64 vadr, u64 addr_range)
333 {
334 struct vcpu *v;
336 #if 0
337 // this only seems to occur at shutdown, but it does occur
338 if ((!addr_range) || addr_range & (addr_range - 1)) {
339 printf("vhpt_flush_address: weird range, spinning...\n");
340 while(1);
341 }
342 #endif
344 domain_purge_swtc_entries(d);
345 smp_mb();
347 for_each_vcpu (d, v) {
348 if (!test_bit(_VCPUF_initialised, &v->vcpu_flags))
349 continue;
351 if (HAS_PERVCPU_VHPT(d)) {
352 vcpu_flush_vhpt_range(v, vadr, addr_range);
353 } else {
354 // SMP: it is racy to reference v->processor.
355 // vcpu scheduler may move this vcpu to another
356 // physicall processor, and change the value
357 // using plain store.
358 // We may be seeing the old value of it.
359 // In such case, flush_vtlb_for_context_switch()
360 /* Invalidate VHPT entries. */
361 cpu_flush_vhpt_range(v->processor, vadr, addr_range);
362 }
363 }
364 // ptc.ga has release semantics.
366 /* ptc.ga */
367 ia64_global_tlb_purge(vadr,vadr+addr_range,PAGE_SHIFT);
368 perfc_incrc(domain_flush_vtlb_range);
369 }
371 #ifdef CONFIG_XEN_IA64_TLB_TRACK
372 #include <asm/tlb_track.h>
373 #include <asm/vmx_vcpu.h>
374 void
375 __domain_flush_vtlb_track_entry(struct domain* d,
376 const struct tlb_track_entry* entry)
377 {
378 unsigned long rr7_rid;
379 int swap_rr0 = 0;
380 unsigned long old_rid;
381 unsigned long vaddr = entry->vaddr;
382 struct vcpu* v;
383 int cpu;
384 int vcpu;
385 int local_purge = 1;
387 BUG_ON((vaddr >> VRN_SHIFT) != VRN7);
388 /*
389 * heuristic:
390 * dom0linux accesses grant mapped pages via the kernel
391 * straight mapped area and it doesn't change rr7 rid.
392 * So it is likey that rr7 == entry->rid so that
393 * we can avoid rid change.
394 * When blktap is supported, this heuristic should be revised.
395 */
396 vcpu_get_rr(current, VRN7 << VRN_SHIFT, &rr7_rid);
397 if (likely(rr7_rid == entry->rid)) {
398 perfc_incrc(tlb_track_use_rr7);
399 } else {
400 swap_rr0 = 1;
401 vaddr = (vaddr << 3) >> 3;// force vrn0
402 perfc_incrc(tlb_track_swap_rr0);
403 }
405 // tlb_track_entry_printf(entry);
406 if (swap_rr0) {
407 vcpu_get_rr(current, 0, &old_rid);
408 vcpu_set_rr(current, 0, entry->rid);
409 }
411 if (HAS_PERVCPU_VHPT(d)) {
412 for_each_vcpu_mask(vcpu, entry->vcpu_dirty_mask) {
413 v = d->vcpu[vcpu];
414 if (!test_bit(_VCPUF_initialised, &v->vcpu_flags))
415 continue;
417 /* Invalidate VHPT entries. */
418 vcpu_flush_vhpt_range(v, vaddr, PAGE_SIZE);
420 /*
421 * current->processor == v->processor
422 * is racy. we may see old v->processor and
423 * a new physical processor of v might see old
424 * vhpt entry and insert tlb.
425 */
426 if (v != current)
427 local_purge = 0;
428 }
429 } else {
430 for_each_cpu_mask(cpu, entry->pcpu_dirty_mask) {
431 /* Invalidate VHPT entries. */
432 cpu_flush_vhpt_range(cpu, vaddr, PAGE_SIZE);
434 if (d->vcpu[cpu] != current)
435 local_purge = 0;
436 }
437 }
439 /* ptc.ga */
440 if (local_purge) {
441 ia64_ptcl(vaddr, PAGE_SHIFT << 2);
442 perfc_incrc(domain_flush_vtlb_local);
443 } else {
444 /* ptc.ga has release semantics. */
445 ia64_global_tlb_purge(vaddr, vaddr + PAGE_SIZE, PAGE_SHIFT);
446 perfc_incrc(domain_flush_vtlb_global);
447 }
449 if (swap_rr0) {
450 vcpu_set_rr(current, 0, old_rid);
451 }
452 perfc_incrc(domain_flush_vtlb_track_entry);
453 }
455 void
456 domain_flush_vtlb_track_entry(struct domain* d,
457 const struct tlb_track_entry* entry)
458 {
459 domain_purge_swtc_entries_vcpu_dirty_mask(d, entry->vcpu_dirty_mask);
460 smp_mb();
462 __domain_flush_vtlb_track_entry(d, entry);
463 }
465 #endif
467 static void flush_tlb_vhpt_all (struct domain *d)
468 {
469 /* First VHPT. */
470 local_vhpt_flush ();
472 /* Then mTLB. */
473 local_flush_tlb_all ();
474 }
476 void domain_flush_tlb_vhpt(struct domain *d)
477 {
478 /* Very heavy... */
479 if (HAS_PERVCPU_VHPT(d) || d->arch.is_vti)
480 on_each_cpu((void (*)(void *))local_flush_tlb_all, NULL, 1, 1);
481 else
482 on_each_cpu((void (*)(void *))flush_tlb_vhpt_all, d, 1, 1);
483 cpus_clear (d->domain_dirty_cpumask);
484 }
486 void flush_tlb_mask(cpumask_t mask)
487 {
488 int cpu;
490 cpu = smp_processor_id();
491 if (cpu_isset (cpu, mask)) {
492 cpu_clear(cpu, mask);
493 flush_tlb_vhpt_all (NULL);
494 }
496 if (cpus_empty(mask))
497 return;
499 for_each_cpu_mask (cpu, mask)
500 smp_call_function_single
501 (cpu, (void (*)(void *))flush_tlb_vhpt_all, NULL, 1, 1);
502 }
504 #ifdef PERF_COUNTERS
505 void gather_vhpt_stats(void)
506 {
507 int i, cpu;
509 perfc_set(vhpt_nbr_entries, VHPT_NUM_ENTRIES);
511 for_each_present_cpu (cpu) {
512 struct vhpt_lf_entry *v = __va(per_cpu(vhpt_paddr, cpu));
513 unsigned long vhpt_valid = 0;
515 for (i = 0; i < VHPT_NUM_ENTRIES; i++, v++)
516 if (!(v->ti_tag & INVALID_TI_TAG))
517 vhpt_valid++;
518 perfc_seta(vhpt_valid_entries, cpu, vhpt_valid);
519 }
520 }
521 #endif