ia64/xen-unstable

view xen/arch/ia64/xen/vhpt.c @ 19848:5839491bbf20

[IA64] replace MAX_VCPUS with d->max_vcpus where necessary.

don't use MAX_VCPUS, and use vcpu::max_vcpus.
The changeset of 2f9e1348aa98 introduced max_vcpus to allow more vcpus
per guest. This patch is ia64 counter part.

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
author Isaku Yamahata <yamahata@valinux.co.jp>
date Mon Jun 29 11:26:05 2009 +0900 (2009-06-29)
parents 7dfc0a20fa59
children
line source
1 /*
2 * Initialize VHPT support.
3 *
4 * Copyright (C) 2004 Hewlett-Packard Co
5 * Dan Magenheimer <dan.magenheimer@hp.com>
6 *
7 * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
8 * VA Linux Systems Japan K.K.
9 * per vcpu vhpt support
10 */
11 #include <linux/config.h>
12 #include <linux/kernel.h>
13 #include <linux/init.h>
15 #include <asm/processor.h>
16 #include <asm/system.h>
17 #include <asm/pgalloc.h>
18 #include <asm/page.h>
19 #include <asm/vhpt.h>
20 #include <asm/vcpu.h>
21 #include <asm/vcpumask.h>
22 #include <asm/vmmu.h>
24 DEFINE_PER_CPU (unsigned long, vhpt_paddr);
25 DEFINE_PER_CPU (unsigned long, vhpt_pend);
26 #ifdef CONFIG_XEN_IA64_TLBFLUSH_CLOCK
27 DEFINE_PER_CPU(volatile u32, vhpt_tlbflush_timestamp);
28 #endif
30 static void
31 __vhpt_flush(unsigned long vhpt_maddr, unsigned long vhpt_size_log2)
32 {
33 struct vhpt_lf_entry *v = (struct vhpt_lf_entry*)__va(vhpt_maddr);
34 unsigned long num_entries = 1 << (vhpt_size_log2 - 5);
35 int i;
37 for (i = 0; i < num_entries; i++, v++)
38 v->ti_tag = INVALID_TI_TAG;
39 }
41 void
42 local_vhpt_flush(void)
43 {
44 /* increment flush clock before flush */
45 u32 flush_time = tlbflush_clock_inc_and_return();
46 __vhpt_flush(__ia64_per_cpu_var(vhpt_paddr), VHPT_SIZE_LOG2);
47 /* this must be after flush */
48 tlbflush_update_time(&__get_cpu_var(vhpt_tlbflush_timestamp),
49 flush_time);
50 perfc_incr(local_vhpt_flush);
51 }
53 void
54 vcpu_vhpt_flush(struct vcpu* v)
55 {
56 unsigned long vhpt_size_log2 = VHPT_SIZE_LOG2;
57 #ifdef CONFIG_XEN_IA64_PERVCPU_VHPT
58 if (HAS_PERVCPU_VHPT(v->domain))
59 vhpt_size_log2 = v->arch.pta.size;
60 #endif
61 __vhpt_flush(vcpu_vhpt_maddr(v), vhpt_size_log2);
62 perfc_incr(vcpu_vhpt_flush);
63 }
65 static void
66 vhpt_erase(unsigned long vhpt_maddr, unsigned long vhpt_size_log2)
67 {
68 struct vhpt_lf_entry *v = (struct vhpt_lf_entry*)__va(vhpt_maddr);
69 unsigned long num_entries = 1 << (vhpt_size_log2 - 5);
70 int i;
72 for (i = 0; i < num_entries; i++, v++) {
73 v->itir = 0;
74 v->CChain = 0;
75 v->page_flags = 0;
76 v->ti_tag = INVALID_TI_TAG;
77 }
78 // initialize cache too???
79 }
81 void vhpt_insert (unsigned long vadr, unsigned long pte, unsigned long itir)
82 {
83 struct vhpt_lf_entry *vlfe = (struct vhpt_lf_entry *)ia64_thash(vadr);
84 unsigned long tag = ia64_ttag (vadr);
86 /* Even though VHPT is per VCPU, still need to first disable the entry,
87 * because the processor may support speculative VHPT walk. */
88 vlfe->ti_tag = INVALID_TI_TAG;
89 wmb();
90 vlfe->itir = itir;
91 vlfe->page_flags = pte | _PAGE_P;
92 *(volatile unsigned long*)&vlfe->ti_tag = tag;
93 }
95 void vhpt_multiple_insert(unsigned long vaddr, unsigned long pte,
96 unsigned long itir)
97 {
98 unsigned char ps = current->arch.vhpt_pg_shift;
99 ia64_itir_t _itir = {.itir = itir};
100 unsigned long mask = (1L << _itir.ps) - 1;
101 int i;
103 if (_itir.ps - ps > 10 && !running_on_sim) {
104 // if this happens, we may want to revisit this algorithm
105 panic("vhpt_multiple_insert:logps-PAGE_SHIFT>10,spinning..\n");
106 }
107 if (_itir.ps - ps > 2) {
108 // FIXME: Should add counter here to see how often this
109 // happens (e.g. for 16MB pages!) and determine if it
110 // is a performance problem. On a quick look, it takes
111 // about 39000 instrs for a 16MB page and it seems to occur
112 // only a few times/second, so OK for now.
113 // An alternate solution would be to just insert the one
114 // 16KB in the vhpt (but with the full mapping)?
115 //printk("vhpt_multiple_insert: logps-PAGE_SHIFT==%d,"
116 //"va=%p, pa=%p, pa-masked=%p\n",
117 //logps-PAGE_SHIFT,vaddr,pte&_PFN_MASK,
118 //(pte&_PFN_MASK)&~mask);
119 }
120 vaddr &= ~mask;
121 pte = ((pte & _PFN_MASK) & ~mask) | (pte & ~_PFN_MASK);
122 for (i = 1L << (_itir.ps - ps); i > 0; i--) {
123 vhpt_insert(vaddr, pte, _itir.itir);
124 vaddr += (1L << ps);
125 }
126 }
128 void __init vhpt_init(void)
129 {
130 unsigned long paddr;
131 struct page_info *page;
132 #if !VHPT_ENABLED
133 return;
134 #endif
135 /* This allocation only holds true if vhpt table is unique for
136 * all domains. Or else later new vhpt table should be allocated
137 * from domain heap when each domain is created. Assume xen buddy
138 * allocator can provide natural aligned page by order?
139 */
140 page = alloc_domheap_pages(NULL, VHPT_SIZE_LOG2 - PAGE_SHIFT, 0);
141 if (!page)
142 panic("vhpt_init: can't allocate VHPT!\n");
143 paddr = page_to_maddr(page);
144 if (paddr & ((1 << VHPT_SIZE_LOG2) - 1))
145 panic("vhpt_init: bad VHPT alignment!\n");
146 __get_cpu_var(vhpt_paddr) = paddr;
147 __get_cpu_var(vhpt_pend) = paddr + (1 << VHPT_SIZE_LOG2) - 1;
148 printk(XENLOG_DEBUG "vhpt_init: vhpt paddr=0x%lx, end=0x%lx\n",
149 paddr, __get_cpu_var(vhpt_pend));
150 vhpt_erase(paddr, VHPT_SIZE_LOG2);
151 // we don't enable VHPT here.
152 // context_switch() or schedule_tail() does it.
153 }
155 #ifdef CONFIG_XEN_IA64_PERVCPU_VHPT
156 void
157 domain_set_vhpt_size(struct domain *d, int8_t vhpt_size_log2)
158 {
159 if (vhpt_size_log2 == -1) {
160 d->arch.has_pervcpu_vhpt = 0;
161 printk(XENLOG_INFO "XEN_DOMCTL_arch_setup: "
162 "domain %d VHPT is global.\n", d->domain_id);
163 } else {
164 d->arch.has_pervcpu_vhpt = 1;
165 d->arch.vhpt_size_log2 = vhpt_size_log2;
166 printk(XENLOG_INFO "XEN_DOMCTL_arch_setup: "
167 "domain %d VHPT is per vcpu. size=2**%d\n",
168 d->domain_id, vhpt_size_log2);
169 }
170 }
172 int
173 pervcpu_vhpt_alloc(struct vcpu *v)
174 {
175 unsigned long vhpt_size_log2 = VHPT_SIZE_LOG2;
177 if (v->domain->arch.vhpt_size_log2 > 0)
178 vhpt_size_log2 =
179 canonicalize_vhpt_size(v->domain->arch.vhpt_size_log2);
180 printk(XENLOG_DEBUG "%s vhpt_size_log2=%ld\n",
181 __func__, vhpt_size_log2);
182 v->arch.vhpt_entries =
183 (1UL << vhpt_size_log2) / sizeof(struct vhpt_lf_entry);
184 v->arch.vhpt_page =
185 alloc_domheap_pages(NULL, vhpt_size_log2 - PAGE_SHIFT, 0);
186 if (!v->arch.vhpt_page)
187 return -ENOMEM;
189 v->arch.vhpt_maddr = page_to_maddr(v->arch.vhpt_page);
190 if (v->arch.vhpt_maddr & ((1 << VHPT_SIZE_LOG2) - 1))
191 panic("pervcpu_vhpt_init: bad VHPT alignment!\n");
193 v->arch.pta.val = 0; // to zero reserved bits
194 v->arch.pta.ve = 1; // enable vhpt
195 v->arch.pta.size = vhpt_size_log2;
196 v->arch.pta.vf = 1; // long format
197 v->arch.pta.base = __va_ul(v->arch.vhpt_maddr) >> 15;
199 vhpt_erase(v->arch.vhpt_maddr, vhpt_size_log2);
200 smp_mb(); // per vcpu vhpt may be used by another physical cpu.
201 return 0;
202 }
204 void
205 pervcpu_vhpt_free(struct vcpu *v)
206 {
207 if (likely(v->arch.vhpt_page != NULL))
208 free_domheap_pages(v->arch.vhpt_page,
209 v->arch.pta.size - PAGE_SHIFT);
210 }
211 #endif
213 void
214 domain_purge_swtc_entries(struct domain *d)
215 {
216 struct vcpu* v;
217 for_each_vcpu(d, v) {
218 if (!v->is_initialised)
219 continue;
221 /* Purge TC entries.
222 FIXME: clear only if match. */
223 vcpu_purge_tr_entry(&PSCBX(v,dtlb));
224 vcpu_purge_tr_entry(&PSCBX(v,itlb));
225 }
226 }
228 void
229 domain_purge_swtc_entries_vcpu_dirty_mask(struct domain* d,
230 vcpumask_t vcpu_dirty_mask)
231 {
232 int vcpu;
234 for_each_vcpu_mask(d, vcpu, vcpu_dirty_mask) {
235 struct vcpu* v = d->vcpu[vcpu];
236 if (!v->is_initialised)
237 continue;
239 /* Purge TC entries.
240 FIXME: clear only if match. */
241 vcpu_purge_tr_entry(&PSCBX(v, dtlb));
242 vcpu_purge_tr_entry(&PSCBX(v, itlb));
243 }
244 }
246 // SMP: we can't assume v == current, vcpu might move to another physical cpu.
247 // So memory barrier is necessary.
248 // if we can guranttee that vcpu can run on only this physical cpu
249 // (e.g. vcpu == current), smp_mb() is unnecessary.
250 void vcpu_flush_vtlb_all(struct vcpu *v)
251 {
252 /* First VCPU tlb. */
253 vcpu_purge_tr_entry(&PSCBX(v,dtlb));
254 vcpu_purge_tr_entry(&PSCBX(v,itlb));
255 smp_mb();
257 /* Then VHPT. */
258 if (HAS_PERVCPU_VHPT(v->domain))
259 vcpu_vhpt_flush(v);
260 else
261 local_vhpt_flush();
262 smp_mb();
264 /* Then mTLB. */
265 local_flush_tlb_all();
267 /* We could clear bit in d->domain_dirty_cpumask only if domain d in
268 not running on this processor. There is currently no easy way to
269 check this. */
271 perfc_incr(vcpu_flush_vtlb_all);
272 }
274 static void __vcpu_flush_vtlb_all(void *vcpu)
275 {
276 vcpu_flush_vtlb_all((struct vcpu*)vcpu);
277 }
279 // caller must incremented reference count to d somehow.
280 void domain_flush_vtlb_all(struct domain* d)
281 {
282 int cpu = smp_processor_id ();
283 struct vcpu *v;
285 for_each_vcpu(d, v) {
286 if (!v->is_initialised)
287 continue;
289 if (VMX_DOMAIN(v)) {
290 // This code may be called for remapping shared_info
291 // and grant_table from guest_physmap_remove_page()
292 // in arch_memory_op() XENMEM_add_to_physmap to realize
293 // PV-on-HVM feature.
294 vmx_vcpu_flush_vtlb_all(v);
295 continue;
296 }
298 if (v->processor == cpu)
299 vcpu_flush_vtlb_all(v);
300 else
301 // SMP: it is racy to reference v->processor.
302 // vcpu scheduler may move this vcpu to another
303 // physicall processor, and change the value
304 // using plain store.
305 // We may be seeing the old value of it.
306 // In such case, flush_vtlb_for_context_switch()
307 // takes care of mTLB flush.
308 smp_call_function_single(v->processor,
309 __vcpu_flush_vtlb_all,
310 v, 1);
311 }
312 perfc_incr(domain_flush_vtlb_all);
313 }
315 // Callers may need to call smp_mb() before/after calling this.
316 // Be carefull.
317 static void
318 __flush_vhpt_range(unsigned long vhpt_maddr, u64 vadr, u64 addr_range)
319 {
320 void *vhpt_base = __va(vhpt_maddr);
321 u64 pgsz = 1L << current->arch.vhpt_pg_shift;
322 u64 purge_addr = vadr & PAGE_MASK;
324 addr_range += vadr - purge_addr;
325 addr_range = PAGE_ALIGN(addr_range);
326 while ((long)addr_range > 0) {
327 /* Get the VHPT entry. */
328 unsigned int off = ia64_thash(purge_addr) -
329 __va_ul(vcpu_vhpt_maddr(current));
330 struct vhpt_lf_entry *v = vhpt_base + off;
331 v->ti_tag = INVALID_TI_TAG;
332 addr_range -= pgsz;
333 purge_addr += pgsz;
334 }
335 }
337 static void
338 cpu_flush_vhpt_range(int cpu, u64 vadr, u64 addr_range)
339 {
340 __flush_vhpt_range(per_cpu(vhpt_paddr, cpu), vadr, addr_range);
341 }
343 static void
344 vcpu_flush_vhpt_range(struct vcpu* v, u64 vadr, u64 addr_range)
345 {
346 __flush_vhpt_range(vcpu_vhpt_maddr(v), vadr, addr_range);
347 }
349 void vcpu_flush_tlb_vhpt_range (u64 vadr, u64 log_range)
350 {
351 if (HAS_PERVCPU_VHPT(current->domain))
352 vcpu_flush_vhpt_range(current, vadr, 1UL << log_range);
353 else
354 cpu_flush_vhpt_range(current->processor,
355 vadr, 1UL << log_range);
356 ia64_ptcl(vadr, log_range << 2);
357 ia64_srlz_i();
358 perfc_incr(vcpu_flush_tlb_vhpt_range);
359 }
361 void domain_flush_vtlb_range (struct domain *d, u64 vadr, u64 addr_range)
362 {
363 struct vcpu *v;
365 #if 0
366 // this only seems to occur at shutdown, but it does occur
367 if ((!addr_range) || addr_range & (addr_range - 1)) {
368 printk("vhpt_flush_address: weird range, spinning...\n");
369 while(1);
370 }
371 #endif
373 domain_purge_swtc_entries(d);
374 smp_mb();
376 for_each_vcpu (d, v) {
377 if (!v->is_initialised)
378 continue;
380 if (HAS_PERVCPU_VHPT(d)) {
381 vcpu_flush_vhpt_range(v, vadr, addr_range);
382 } else {
383 // SMP: it is racy to reference v->processor.
384 // vcpu scheduler may move this vcpu to another
385 // physicall processor, and change the value
386 // using plain store.
387 // We may be seeing the old value of it.
388 // In such case, flush_vtlb_for_context_switch()
389 /* Invalidate VHPT entries. */
390 cpu_flush_vhpt_range(v->processor, vadr, addr_range);
391 }
392 }
393 // ptc.ga has release semantics.
395 /* ptc.ga */
396 platform_global_tlb_purge(vadr, vadr + addr_range,
397 current->arch.vhpt_pg_shift);
398 perfc_incr(domain_flush_vtlb_range);
399 }
401 #ifdef CONFIG_XEN_IA64_TLB_TRACK
402 #include <asm/tlb_track.h>
403 #include <asm/vmx_vcpu.h>
404 void
405 __domain_flush_vtlb_track_entry(struct domain* d,
406 const struct tlb_track_entry* entry)
407 {
408 unsigned long rr7_rid;
409 int swap_rr0 = 0;
410 unsigned long old_rid;
411 unsigned long vaddr = entry->vaddr;
412 struct vcpu* v;
413 int cpu;
414 int vcpu;
415 int local_purge = 1;
417 /* tlb inert tracking is done in PAGE_SIZE uint. */
418 unsigned char ps = max_t(unsigned char,
419 current->arch.vhpt_pg_shift, PAGE_SHIFT);
420 /* This case isn't supported (yet). */
421 BUG_ON(current->arch.vhpt_pg_shift > PAGE_SHIFT);
423 BUG_ON((vaddr >> VRN_SHIFT) != VRN7);
424 /*
425 * heuristic:
426 * dom0linux accesses grant mapped pages via the kernel
427 * straight mapped area and it doesn't change rr7 rid.
428 * So it is likey that rr7 == entry->rid so that
429 * we can avoid rid change.
430 * When blktap is supported, this heuristic should be revised.
431 */
432 vcpu_get_rr(current, VRN7 << VRN_SHIFT, &rr7_rid);
433 if (likely(rr7_rid == entry->rid)) {
434 perfc_incr(tlb_track_use_rr7);
435 } else {
436 swap_rr0 = 1;
437 vaddr = (vaddr << 3) >> 3;// force vrn0
438 perfc_incr(tlb_track_swap_rr0);
439 }
441 // tlb_track_entry_printf(entry);
442 if (swap_rr0) {
443 vcpu_get_rr(current, 0, &old_rid);
444 vcpu_set_rr(current, 0, entry->rid);
445 }
447 if (HAS_PERVCPU_VHPT(d)) {
448 for_each_vcpu_mask(d, vcpu, entry->vcpu_dirty_mask) {
449 v = d->vcpu[vcpu];
450 if (!v->is_initialised)
451 continue;
453 /* Invalidate VHPT entries. */
454 vcpu_flush_vhpt_range(v, vaddr, 1L << ps);
456 /*
457 * current->processor == v->processor
458 * is racy. we may see old v->processor and
459 * a new physical processor of v might see old
460 * vhpt entry and insert tlb.
461 */
462 if (v != current)
463 local_purge = 0;
464 }
465 } else {
466 for_each_cpu_mask(cpu, entry->pcpu_dirty_mask) {
467 /* Invalidate VHPT entries. */
468 cpu_flush_vhpt_range(cpu, vaddr, 1L << ps);
470 if (d->vcpu[cpu] != current)
471 local_purge = 0;
472 }
473 }
475 /* ptc.ga */
476 if (local_purge) {
477 ia64_ptcl(vaddr, ps << 2);
478 perfc_incr(domain_flush_vtlb_local);
479 } else {
480 /* ptc.ga has release semantics. */
481 platform_global_tlb_purge(vaddr, vaddr + (1L << ps), ps);
482 perfc_incr(domain_flush_vtlb_global);
483 }
485 if (swap_rr0) {
486 vcpu_set_rr(current, 0, old_rid);
487 }
488 perfc_incr(domain_flush_vtlb_track_entry);
489 }
491 void
492 domain_flush_vtlb_track_entry(struct domain* d,
493 const struct tlb_track_entry* entry)
494 {
495 domain_purge_swtc_entries_vcpu_dirty_mask(d, entry->vcpu_dirty_mask);
496 smp_mb();
498 __domain_flush_vtlb_track_entry(d, entry);
499 }
501 #endif
503 static void flush_tlb_vhpt_all (struct domain *d)
504 {
505 /* First VHPT. */
506 local_vhpt_flush ();
508 /* Then mTLB. */
509 local_flush_tlb_all ();
510 }
512 void domain_flush_tlb_vhpt(struct domain *d)
513 {
514 /* Very heavy... */
515 if (HAS_PERVCPU_VHPT(d) || is_hvm_domain(d))
516 on_each_cpu((void (*)(void *))local_flush_tlb_all, NULL, 1);
517 else
518 on_each_cpu((void (*)(void *))flush_tlb_vhpt_all, d, 1);
519 cpus_clear (d->domain_dirty_cpumask);
520 }
522 void flush_tlb_for_log_dirty(struct domain *d)
523 {
524 struct vcpu *v;
526 /* NB. There is no race because all vcpus are paused. */
527 if (is_hvm_domain(d)) {
528 for_each_vcpu (d, v) {
529 if (!v->is_initialised)
530 continue;
531 /* XXX: local_flush_tlb_all is called redundantly */
532 thash_purge_all(v);
533 }
534 smp_call_function((void (*)(void *))local_flush_tlb_all,
535 NULL, 1);
536 } else if (HAS_PERVCPU_VHPT(d)) {
537 for_each_vcpu (d, v) {
538 if (!v->is_initialised)
539 continue;
540 vcpu_purge_tr_entry(&PSCBX(v,dtlb));
541 vcpu_purge_tr_entry(&PSCBX(v,itlb));
542 vcpu_vhpt_flush(v);
543 }
544 on_each_cpu((void (*)(void *))local_flush_tlb_all, NULL, 1);
545 } else {
546 on_each_cpu((void (*)(void *))flush_tlb_vhpt_all, d, 1);
547 }
548 cpus_clear (d->domain_dirty_cpumask);
549 }
551 void flush_tlb_mask(const cpumask_t *mask)
552 {
553 int cpu;
555 cpu = smp_processor_id();
556 if (cpu_isset(cpu, *mask))
557 flush_tlb_vhpt_all (NULL);
559 if (cpus_subset(*mask, *cpumask_of(cpu)))
560 return;
562 for_each_cpu_mask (cpu, *mask)
563 if (cpu != smp_processor_id())
564 smp_call_function_single
565 (cpu, (void (*)(void *))flush_tlb_vhpt_all, NULL, 1);
566 }
568 #ifdef PERF_COUNTERS
569 void gather_vhpt_stats(void)
570 {
571 int i, cpu;
573 perfc_set(vhpt_nbr_entries, VHPT_NUM_ENTRIES);
575 for_each_present_cpu (cpu) {
576 struct vhpt_lf_entry *v = __va(per_cpu(vhpt_paddr, cpu));
577 unsigned long vhpt_valid = 0;
579 for (i = 0; i < VHPT_NUM_ENTRIES; i++, v++)
580 if (!(v->ti_tag & INVALID_TI_TAG))
581 vhpt_valid++;
582 per_cpu(perfcounters, cpu)[PERFC_vhpt_valid_entries] = vhpt_valid;
583 }
584 }
585 #endif