ia64/xen-unstable

view xen/arch/ia64/xen/vhpt.c @ 15423:cbf749e9961f

[IA64] Cleanup: Move is_platform_hp_ski() from xenmisc.c to xensetup.c

- only caller is start_kernel
- change to static __init
- also move running_on_sim to xensetup.c, and change it from unsigned
long to int, since it's just a boolean
- declare running_on_sim in config.h near some other externs

Tested by building, booting, starting a PV guest on rx2620.

Signed-off-by: Aron Griffis <aron@hp.com>
author Alex Williamson <alex.williamson@hp.com>
date Mon Jul 02 10:25:29 2007 -0600 (2007-07-02)
parents 034f3e20ad10
children 255abff9d1f7
line source
1 /*
2 * Initialize VHPT support.
3 *
4 * Copyright (C) 2004 Hewlett-Packard Co
5 * Dan Magenheimer <dan.magenheimer@hp.com>
6 *
7 * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
8 * VA Linux Systems Japan K.K.
9 * per vcpu vhpt support
10 */
11 #include <linux/config.h>
12 #include <linux/kernel.h>
13 #include <linux/init.h>
15 #include <asm/processor.h>
16 #include <asm/system.h>
17 #include <asm/pgalloc.h>
18 #include <asm/page.h>
19 #include <asm/vhpt.h>
20 #include <asm/vcpu.h>
21 #include <asm/vcpumask.h>
22 #include <asm/vmmu.h>
24 DEFINE_PER_CPU (unsigned long, vhpt_paddr);
25 DEFINE_PER_CPU (unsigned long, vhpt_pend);
26 #ifdef CONFIG_XEN_IA64_TLBFLUSH_CLOCK
27 DEFINE_PER_CPU(volatile u32, vhpt_tlbflush_timestamp);
28 #endif
30 static void
31 __vhpt_flush(unsigned long vhpt_maddr)
32 {
33 struct vhpt_lf_entry *v = (struct vhpt_lf_entry*)__va(vhpt_maddr);
34 int i;
36 for (i = 0; i < VHPT_NUM_ENTRIES; i++, v++)
37 v->ti_tag = INVALID_TI_TAG;
38 }
40 void
41 local_vhpt_flush(void)
42 {
43 /* increment flush clock before flush */
44 u32 flush_time = tlbflush_clock_inc_and_return();
45 __vhpt_flush(__ia64_per_cpu_var(vhpt_paddr));
46 /* this must be after flush */
47 tlbflush_update_time(&__get_cpu_var(vhpt_tlbflush_timestamp),
48 flush_time);
49 perfc_incr(local_vhpt_flush);
50 }
52 void
53 vcpu_vhpt_flush(struct vcpu* v)
54 {
55 __vhpt_flush(vcpu_vhpt_maddr(v));
56 perfc_incr(vcpu_vhpt_flush);
57 }
59 static void
60 vhpt_erase(unsigned long vhpt_maddr)
61 {
62 struct vhpt_lf_entry *v = (struct vhpt_lf_entry*)__va(vhpt_maddr);
63 int i;
65 for (i = 0; i < VHPT_NUM_ENTRIES; i++, v++) {
66 v->itir = 0;
67 v->CChain = 0;
68 v->page_flags = 0;
69 v->ti_tag = INVALID_TI_TAG;
70 }
71 // initialize cache too???
72 }
74 void vhpt_insert (unsigned long vadr, unsigned long pte, unsigned long logps)
75 {
76 struct vhpt_lf_entry *vlfe = (struct vhpt_lf_entry *)ia64_thash(vadr);
77 unsigned long tag = ia64_ttag (vadr);
79 /* Even though VHPT is per VCPU, still need to first disable the entry,
80 * because the processor may support speculative VHPT walk. */
81 vlfe->ti_tag = INVALID_TI_TAG;
82 wmb();
83 vlfe->itir = logps;
84 vlfe->page_flags = pte | _PAGE_P;
85 *(volatile unsigned long*)&vlfe->ti_tag = tag;
86 }
88 void vhpt_multiple_insert(unsigned long vaddr, unsigned long pte, unsigned long logps)
89 {
90 unsigned long mask = (1L << logps) - 1;
91 int i;
93 if (logps-PAGE_SHIFT > 10 && !running_on_sim) {
94 // if this happens, we may want to revisit this algorithm
95 panic("vhpt_multiple_insert:logps-PAGE_SHIFT>10,spinning..\n");
96 }
97 if (logps-PAGE_SHIFT > 2) {
98 // FIXME: Should add counter here to see how often this
99 // happens (e.g. for 16MB pages!) and determine if it
100 // is a performance problem. On a quick look, it takes
101 // about 39000 instrs for a 16MB page and it seems to occur
102 // only a few times/second, so OK for now.
103 // An alternate solution would be to just insert the one
104 // 16KB in the vhpt (but with the full mapping)?
105 //printk("vhpt_multiple_insert: logps-PAGE_SHIFT==%d,"
106 //"va=%p, pa=%p, pa-masked=%p\n",
107 //logps-PAGE_SHIFT,vaddr,pte&_PFN_MASK,
108 //(pte&_PFN_MASK)&~mask);
109 }
110 vaddr &= ~mask;
111 pte = ((pte & _PFN_MASK) & ~mask) | (pte & ~_PFN_MASK);
112 for (i = 1L << (logps-PAGE_SHIFT); i > 0; i--) {
113 vhpt_insert(vaddr,pte,logps<<2);
114 vaddr += PAGE_SIZE;
115 }
116 }
118 void __init vhpt_init(void)
119 {
120 unsigned long paddr;
121 struct page_info *page;
122 #if !VHPT_ENABLED
123 return;
124 #endif
125 /* This allocation only holds true if vhpt table is unique for
126 * all domains. Or else later new vhpt table should be allocated
127 * from domain heap when each domain is created. Assume xen buddy
128 * allocator can provide natural aligned page by order?
129 */
130 page = alloc_domheap_pages(NULL, VHPT_SIZE_LOG2 - PAGE_SHIFT, 0);
131 if (!page)
132 panic("vhpt_init: can't allocate VHPT!\n");
133 paddr = page_to_maddr(page);
134 if (paddr & ((1 << VHPT_SIZE_LOG2) - 1))
135 panic("vhpt_init: bad VHPT alignment!\n");
136 __get_cpu_var(vhpt_paddr) = paddr;
137 __get_cpu_var(vhpt_pend) = paddr + (1 << VHPT_SIZE_LOG2) - 1;
138 printk(XENLOG_DEBUG "vhpt_init: vhpt paddr=0x%lx, end=0x%lx\n",
139 paddr, __get_cpu_var(vhpt_pend));
140 vhpt_erase(paddr);
141 // we don't enable VHPT here.
142 // context_switch() or schedule_tail() does it.
143 }
145 #ifdef CONFIG_XEN_IA64_PERVCPU_VHPT
146 int
147 pervcpu_vhpt_alloc(struct vcpu *v)
148 {
149 unsigned long vhpt_size_log2 = VHPT_SIZE_LOG2;
151 v->arch.vhpt_entries =
152 (1UL << vhpt_size_log2) / sizeof(struct vhpt_lf_entry);
153 v->arch.vhpt_page =
154 alloc_domheap_pages(NULL, vhpt_size_log2 - PAGE_SHIFT, 0);
155 if (!v->arch.vhpt_page)
156 return -ENOMEM;
158 v->arch.vhpt_maddr = page_to_maddr(v->arch.vhpt_page);
159 if (v->arch.vhpt_maddr & ((1 << VHPT_SIZE_LOG2) - 1))
160 panic("pervcpu_vhpt_init: bad VHPT alignment!\n");
162 v->arch.pta.val = 0; // to zero reserved bits
163 v->arch.pta.ve = 1; // enable vhpt
164 v->arch.pta.size = VHPT_SIZE_LOG2;
165 v->arch.pta.vf = 1; // long format
166 v->arch.pta.base = __va_ul(v->arch.vhpt_maddr) >> 15;
168 vhpt_erase(v->arch.vhpt_maddr);
169 smp_mb(); // per vcpu vhpt may be used by another physical cpu.
170 return 0;
171 }
173 void
174 pervcpu_vhpt_free(struct vcpu *v)
175 {
176 if (likely(v->arch.vhpt_page != NULL))
177 free_domheap_pages(v->arch.vhpt_page,
178 VHPT_SIZE_LOG2 - PAGE_SHIFT);
179 }
180 #endif
182 void
183 domain_purge_swtc_entries(struct domain *d)
184 {
185 struct vcpu* v;
186 for_each_vcpu(d, v) {
187 if (!v->is_initialised)
188 continue;
190 /* Purge TC entries.
191 FIXME: clear only if match. */
192 vcpu_purge_tr_entry(&PSCBX(v,dtlb));
193 vcpu_purge_tr_entry(&PSCBX(v,itlb));
194 }
195 }
197 void
198 domain_purge_swtc_entries_vcpu_dirty_mask(struct domain* d,
199 vcpumask_t vcpu_dirty_mask)
200 {
201 int vcpu;
203 for_each_vcpu_mask(vcpu, vcpu_dirty_mask) {
204 struct vcpu* v = d->vcpu[vcpu];
205 if (!v->is_initialised)
206 continue;
208 /* Purge TC entries.
209 FIXME: clear only if match. */
210 vcpu_purge_tr_entry(&PSCBX(v, dtlb));
211 vcpu_purge_tr_entry(&PSCBX(v, itlb));
212 }
213 }
215 // SMP: we can't assume v == current, vcpu might move to another physical cpu.
216 // So memory barrier is necessary.
217 // if we can guranttee that vcpu can run on only this physical cpu
218 // (e.g. vcpu == current), smp_mb() is unnecessary.
219 void vcpu_flush_vtlb_all(struct vcpu *v)
220 {
221 if (VMX_DOMAIN(v)) {
222 /* This code may be call for remapping shared_info and
223 grant_table share page from guest_physmap_remove_page()
224 in arch_memory_op() XENMEM_add_to_physmap to realize
225 PV-on-HVM feature. */
226 /* FIXME: This is not SMP-safe yet about p2m table */
227 /* Purge vTLB for VT-i domain */
228 thash_purge_all(v);
229 }
230 else {
231 /* First VCPU tlb. */
232 vcpu_purge_tr_entry(&PSCBX(v,dtlb));
233 vcpu_purge_tr_entry(&PSCBX(v,itlb));
234 smp_mb();
236 /* Then VHPT. */
237 if (HAS_PERVCPU_VHPT(v->domain))
238 vcpu_vhpt_flush(v);
239 else
240 local_vhpt_flush();
241 smp_mb();
243 /* Then mTLB. */
244 local_flush_tlb_all();
245 }
247 /* We could clear bit in d->domain_dirty_cpumask only if domain d in
248 not running on this processor. There is currently no easy way to
249 check this. */
251 perfc_incr(vcpu_flush_vtlb_all);
252 }
254 static void __vcpu_flush_vtlb_all(void *vcpu)
255 {
256 vcpu_flush_vtlb_all((struct vcpu*)vcpu);
257 }
259 // caller must incremented reference count to d somehow.
260 void domain_flush_vtlb_all(struct domain* d)
261 {
262 int cpu = smp_processor_id ();
263 struct vcpu *v;
265 for_each_vcpu(d, v) {
266 if (!v->is_initialised)
267 continue;
269 if (v->processor == cpu)
270 vcpu_flush_vtlb_all(v);
271 else
272 // SMP: it is racy to reference v->processor.
273 // vcpu scheduler may move this vcpu to another
274 // physicall processor, and change the value
275 // using plain store.
276 // We may be seeing the old value of it.
277 // In such case, flush_vtlb_for_context_switch()
278 // takes care of mTLB flush.
279 smp_call_function_single(v->processor,
280 __vcpu_flush_vtlb_all,
281 v, 1, 1);
282 }
283 perfc_incr(domain_flush_vtlb_all);
284 }
286 // Callers may need to call smp_mb() before/after calling this.
287 // Be carefull.
288 static void
289 __flush_vhpt_range(unsigned long vhpt_maddr, u64 vadr, u64 addr_range)
290 {
291 void *vhpt_base = __va(vhpt_maddr);
293 while ((long)addr_range > 0) {
294 /* Get the VHPT entry. */
295 unsigned int off = ia64_thash(vadr) -
296 __va_ul(vcpu_vhpt_maddr(current));
297 struct vhpt_lf_entry *v = vhpt_base + off;
298 v->ti_tag = INVALID_TI_TAG;
299 addr_range -= PAGE_SIZE;
300 vadr += PAGE_SIZE;
301 }
302 }
304 static void
305 cpu_flush_vhpt_range(int cpu, u64 vadr, u64 addr_range)
306 {
307 __flush_vhpt_range(per_cpu(vhpt_paddr, cpu), vadr, addr_range);
308 }
310 static void
311 vcpu_flush_vhpt_range(struct vcpu* v, u64 vadr, u64 addr_range)
312 {
313 __flush_vhpt_range(vcpu_vhpt_maddr(v), vadr, addr_range);
314 }
316 void vcpu_flush_tlb_vhpt_range (u64 vadr, u64 log_range)
317 {
318 if (HAS_PERVCPU_VHPT(current->domain))
319 vcpu_flush_vhpt_range(current, vadr, 1UL << log_range);
320 else
321 cpu_flush_vhpt_range(current->processor,
322 vadr, 1UL << log_range);
323 ia64_ptcl(vadr, log_range << 2);
324 ia64_srlz_i();
325 perfc_incr(vcpu_flush_tlb_vhpt_range);
326 }
328 void domain_flush_vtlb_range (struct domain *d, u64 vadr, u64 addr_range)
329 {
330 struct vcpu *v;
332 #if 0
333 // this only seems to occur at shutdown, but it does occur
334 if ((!addr_range) || addr_range & (addr_range - 1)) {
335 printk("vhpt_flush_address: weird range, spinning...\n");
336 while(1);
337 }
338 #endif
340 domain_purge_swtc_entries(d);
341 smp_mb();
343 for_each_vcpu (d, v) {
344 if (!v->is_initialised)
345 continue;
347 if (HAS_PERVCPU_VHPT(d)) {
348 vcpu_flush_vhpt_range(v, vadr, addr_range);
349 } else {
350 // SMP: it is racy to reference v->processor.
351 // vcpu scheduler may move this vcpu to another
352 // physicall processor, and change the value
353 // using plain store.
354 // We may be seeing the old value of it.
355 // In such case, flush_vtlb_for_context_switch()
356 /* Invalidate VHPT entries. */
357 cpu_flush_vhpt_range(v->processor, vadr, addr_range);
358 }
359 }
360 // ptc.ga has release semantics.
362 /* ptc.ga */
363 platform_global_tlb_purge(vadr, vadr + addr_range, PAGE_SHIFT);
364 perfc_incr(domain_flush_vtlb_range);
365 }
367 #ifdef CONFIG_XEN_IA64_TLB_TRACK
368 #include <asm/tlb_track.h>
369 #include <asm/vmx_vcpu.h>
370 void
371 __domain_flush_vtlb_track_entry(struct domain* d,
372 const struct tlb_track_entry* entry)
373 {
374 unsigned long rr7_rid;
375 int swap_rr0 = 0;
376 unsigned long old_rid;
377 unsigned long vaddr = entry->vaddr;
378 struct vcpu* v;
379 int cpu;
380 int vcpu;
381 int local_purge = 1;
383 BUG_ON((vaddr >> VRN_SHIFT) != VRN7);
384 /*
385 * heuristic:
386 * dom0linux accesses grant mapped pages via the kernel
387 * straight mapped area and it doesn't change rr7 rid.
388 * So it is likey that rr7 == entry->rid so that
389 * we can avoid rid change.
390 * When blktap is supported, this heuristic should be revised.
391 */
392 vcpu_get_rr(current, VRN7 << VRN_SHIFT, &rr7_rid);
393 if (likely(rr7_rid == entry->rid)) {
394 perfc_incr(tlb_track_use_rr7);
395 } else {
396 swap_rr0 = 1;
397 vaddr = (vaddr << 3) >> 3;// force vrn0
398 perfc_incr(tlb_track_swap_rr0);
399 }
401 // tlb_track_entry_printf(entry);
402 if (swap_rr0) {
403 vcpu_get_rr(current, 0, &old_rid);
404 vcpu_set_rr(current, 0, entry->rid);
405 }
407 if (HAS_PERVCPU_VHPT(d)) {
408 for_each_vcpu_mask(vcpu, entry->vcpu_dirty_mask) {
409 v = d->vcpu[vcpu];
410 if (!v->is_initialised)
411 continue;
413 /* Invalidate VHPT entries. */
414 vcpu_flush_vhpt_range(v, vaddr, PAGE_SIZE);
416 /*
417 * current->processor == v->processor
418 * is racy. we may see old v->processor and
419 * a new physical processor of v might see old
420 * vhpt entry and insert tlb.
421 */
422 if (v != current)
423 local_purge = 0;
424 }
425 } else {
426 for_each_cpu_mask(cpu, entry->pcpu_dirty_mask) {
427 /* Invalidate VHPT entries. */
428 cpu_flush_vhpt_range(cpu, vaddr, PAGE_SIZE);
430 if (d->vcpu[cpu] != current)
431 local_purge = 0;
432 }
433 }
435 /* ptc.ga */
436 if (local_purge) {
437 ia64_ptcl(vaddr, PAGE_SHIFT << 2);
438 perfc_incr(domain_flush_vtlb_local);
439 } else {
440 /* ptc.ga has release semantics. */
441 platform_global_tlb_purge(vaddr, vaddr + PAGE_SIZE,
442 PAGE_SHIFT);
443 perfc_incr(domain_flush_vtlb_global);
444 }
446 if (swap_rr0) {
447 vcpu_set_rr(current, 0, old_rid);
448 }
449 perfc_incr(domain_flush_vtlb_track_entry);
450 }
452 void
453 domain_flush_vtlb_track_entry(struct domain* d,
454 const struct tlb_track_entry* entry)
455 {
456 domain_purge_swtc_entries_vcpu_dirty_mask(d, entry->vcpu_dirty_mask);
457 smp_mb();
459 __domain_flush_vtlb_track_entry(d, entry);
460 }
462 #endif
464 static void flush_tlb_vhpt_all (struct domain *d)
465 {
466 /* First VHPT. */
467 local_vhpt_flush ();
469 /* Then mTLB. */
470 local_flush_tlb_all ();
471 }
473 void domain_flush_tlb_vhpt(struct domain *d)
474 {
475 /* Very heavy... */
476 if (HAS_PERVCPU_VHPT(d) || d->arch.is_vti)
477 on_each_cpu((void (*)(void *))local_flush_tlb_all, NULL, 1, 1);
478 else
479 on_each_cpu((void (*)(void *))flush_tlb_vhpt_all, d, 1, 1);
480 cpus_clear (d->domain_dirty_cpumask);
481 }
483 void flush_tlb_mask(cpumask_t mask)
484 {
485 int cpu;
487 cpu = smp_processor_id();
488 if (cpu_isset (cpu, mask)) {
489 cpu_clear(cpu, mask);
490 flush_tlb_vhpt_all (NULL);
491 }
493 if (cpus_empty(mask))
494 return;
496 for_each_cpu_mask (cpu, mask)
497 smp_call_function_single
498 (cpu, (void (*)(void *))flush_tlb_vhpt_all, NULL, 1, 1);
499 }
501 #ifdef PERF_COUNTERS
502 void gather_vhpt_stats(void)
503 {
504 int i, cpu;
506 perfc_set(vhpt_nbr_entries, VHPT_NUM_ENTRIES);
508 for_each_present_cpu (cpu) {
509 struct vhpt_lf_entry *v = __va(per_cpu(vhpt_paddr, cpu));
510 unsigned long vhpt_valid = 0;
512 for (i = 0; i < VHPT_NUM_ENTRIES; i++, v++)
513 if (!(v->ti_tag & INVALID_TI_TAG))
514 vhpt_valid++;
515 per_cpu(perfcounters, cpu)[PERFC_vhpt_valid_entries] = vhpt_valid;
516 }
517 }
518 #endif